Bayesian zero-inflated negative binomial model in Python using Stan

From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press

you are kindly asked to include the complete citation if you used this material in a publication

Code 7.5 Bayesian zero-inflated negative binomial model in Python using Stan

=================================================================

import numpy as np
import pystan
import statsmodels.api as sm

from rpy2.robjects import r, FloatVector
from scipy.stats import uniform, bernoulli

def gen_zinegbinom(N, mu1, mu2, alpha):
"""Zero inflated negative binomial distribution."""

# load R package
r('require(VGAM)')

# get R functions
zinbinomR = r['rzinegbin']
res = zinbinomR(n=N, munb=FloatVector(mu1), size=1.0/alpha,
pstr0=FloatVector(mu2))

return np.array([int(item) for item in res])

# Data
np.random.seed(141) # set seed to replicate example
nobs= 7500 # number of obs in model

x1 = uniform.rvs(size=nobs)
x2 = bernoulli.rvs(0.6, size=nobs)

xb = 1.0 + 2.0 * x1 + 1.5 * x2 # linear predictor
xc = 2.0 - 5.0 * x1 + 3.0 * x2

exb = np.exp(xb)
exc = 1 / (1 + np.exp(-xc))
alpha = 2

# create y as adjusted
zinby = gen_zinegbinom(nobs, exb, exc, alpha)

X = np.column_stack((x1,x2))
X = sm.add_constant(X)

mydata = {} # build data dictionary
mydata['Y'] = zinby # response variable
mydata['N'] = nobs # sample size
mydata['Xb'] = X # predictors
mydata['Xc'] = X
mydata['Kb'] = X.shape[1] # number of coefficients
mydata['Kc'] = X.shape[1]

# Fit
stan_code = """
data{
int N;
int Kb;
int Kc;
matrix[N, Kb] Xb;
matrix[N, Kc] Xc;
int Y[N];
}
parameters{
vector[Kc] beta;
vector[Kb] gamma;
real<lower=0> alpha;

}
transformed parameters{
vector[N] mu;
vector[N] Pi;

mu = exp(Xc * beta);
for (i in 1:N) Pi[i] = inv_logit(Xb[i] * gamma);
}
model{
vector[N] LL;

for (i in 1:N) {
if (Y[i] == 0) {
LL[i] = log_sum_exp(bernoulli_lpmf(1| Pi[i]),
bernoulli_lpmf(0| Pi[i]) +
neg_binomial_2_lpmf(Y[i]| mu[i], 1/alpha));
} else {
LL[i] = bernoulli_lpmf(0| Pi[i]) +
neg_binomial_2_lpmf(Y[i]| mu[i], 1/alpha);
}
}
target += LL;
}
"""

# Run mcmc
fit = pystan.stan(model_code=stan_code, data=mydata, iter=7000, chains=3,
warmup=3500, n_jobs=3)

# Output
nlines = 12 # number of lines in screen output

output = str(fit).split('\n')
for item in output[:nlines]:
print(item)

=================================================================

GET SOURCE

Output on screen:

Inference for Stan model: anon_model_9dd78dbc1f3c1ddb1d2ba6cf92010d52.
3 chains, each with iter=7000; warmup=3500; thin=1;
post-warmup draws per chain=3500, total post-warmup draws=10500.

mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[0]    1.16    1.3e-3 0.1 0.97 1.09 1.16 1.22 1.35 5372.0 1.0
beta[1] 1.87 1.8e-3   0.13 1.61 1.78 1.87 1.96 2.13 5433.0 1.0
beta[2] 1.36 7.6e-4 0.07 1.22 1.31 1.35 1.4 1.49 7996.0 1.0
gamma[0] 2.09 1.2e-3 0.1 1.9 2.03   2.09 2.15 2.28 6212.0 1.0
gamma[1] -4.97 2.5e-3 0.19 -5.33 -5.09 -4.97 -4.84 -4.61 5331.0 1.0
gamma[2] 2.94 1.5e-3 0.11 2.73 2.87 2.94 3.02 3.16 5371.0 1.0
alpha 1.85 1.2e-3 0.09 1.67 1.78 1.85 1.91 2.04 6520.0 1.0

HSI

HSI