HSI
From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press
(c) 2017, Joseph M. Hilbe, Rafael S. de Souza and Emille E. O. Ishida
you are kindly asked to include the complete citation if you used this material in a publication
​
​
​
​
Code 10.4 Gaussian linear mixed model, in Python using Stan, for modeling the relationship between type Ia supernovae host galaxy mass and Hubble residuals
==================================================================================
import numpy as np
import pandas as pd
import pystan
​
# Data
path_to_data = 'https://raw.githubusercontent.com/astrobayes/BMAD/master/data/Section_10p2/HR.csv'
data_frame = dict(pd.read_csv(path_to_data))
​
# prepare data for Stan
data = {}
data['obsx'] = np.array(data_frame['LogMass'])
data['errx'] = np.array(data_frame['e_LogMass'])
data['obsy'] = np.array(data_frame['HR'])
data['erry'] = np.array(data_frame['e_HR'])
data['type'] = np.array([1 if item == 'P' else 0
for item in data_frame['Type']])
data['N'] = len(data['obsx'])
data['K'] = 2 # number of distinct populations
data['L'] = 2 # number of coefficients
​
# Fit
stan_code="""
data{
int<lower=0> N; # number of data points
int<lower=0> K; # number of distinct populations
int<lower=0> L; # number of coefficients
vector[N] obsx; # obs host galaxy mass
vector<lower=0>[N] errx; # errors in host mass measurements
vector[N] obsy; # obs Hubble Residual
vector<lower=0>[N] erry; # errors in Hubble Residual measurements
vector[N] type; # flag for spec/photo sample
}
parameters{
matrix[K,L] beta; # linear predictor coefficients
real<lower=0> sigma; # scatter around true black hole mass
vector[N] x; # true host galaxy mass
vector[N] y; # true Hubble Residuals
real<lower=0, upper=5> sig0; # scatter for shared hyperprior on beta
real mu0; # mean for shared hyperprior on beta
}
transformed parameters{
vector[N] mu; # linear predictor
for (i in 1:N) {
if (type[i] == type[1]) mu[i] = beta[1,1] + beta[2,1] * x[i];
else mu[i] = beta[1,2] + beta[2,2] * x[i];
}
}
model{
# shared hyperprior
mu0 ~ normal(0, 1);
sig0 ~ normal(0, 5);
for (i in 1:K){
for (j in 1:L) beta[i,j] ~ normal(mu0, sig0);
}
​
# priors and likelihood
obsx ~ normal(x, errx);
x ~ normal(0, 10);
y ~ normal(mu, sigma);
sigma ~ gamma(0.5,0.5);
​
obsy ~ normal(y, erry);
}
"""
​
# Run mcmc
fit = pystan.stan(model_code=stan_code, data=data, iter=40000, chains=3,
warmup=15000, thin=1, n_jobs=3)
​
# Output
nlines = 10 # number of lines in screen output
​
output = str(fit).split('\n')
for item in output[:nlines]:
print(item)
==================================================================================
Output on screen:
​
Inference for Stan model: anon_model_1a0e6a77727e9765f27eaf68294c6a0a.
3 chains, each with iter=40000; warmup=15000; thin=1;
post-warmup draws per chain=25000, total post-warmup draws=75000.
​
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[0,0] 0.82 1.9e-3 0.27 0.27 0.65 0.83 1.01 1.34 20282 1.0
beta[1,0] -0.08 1.8e-4 0.03 -0.13 -0.1 -0.08 -0.06 - 0.03 20321 1.0
beta[0,1] 0.24 9.1e-4 0.18 -0.12 0.12 0.24 0.36 0.6 39828 1.0
beta[1,1] -0.02 8.8e-5 0.02 -0.06 -0.03 -0.02 -0.01 0.01 39753 1.0
sigma 0.12 6.5e-5 9.0e-3 0.1 0.11 0.12 0.13 0.14 18993 1.0