############################################## ### BINARY PROBIT REGRESSION ### ############################################## ### tested on R version 2.9.0 (2009-06-15) ### ############################################## # Package by M. Plummer, N. Best, K. Cowles and K. Vines: output analysis and diagnostics for MCMC library(coda) # Package by J. Albert: functions for learning Bayesian inference library(LearnBayes) # note that package 'coda' requires package 'lattice' ######## Data analyzed by Albert (2007, 10.3) ######## dataname <- "Donner survival data" data(donner) ### maximum likelihood fit for a model of interest ### mlfit <- glm(survival ~ age + male, data = donner, family = binomial(link = probit), x = TRUE, y = TRUE) ############### prior hyper-parameters ############### g <- 45 T0 <- (1/g) * t(mlfit$x) %*% mlfit$x b0 <- rep(0, ncol(mlfit$x)) ########### length of Monte Carlo samples ############ mclen <- 10000 ###################################################### # Bayesian probit (regression) # MCMC algorithm based on Albert & Chib (1993) # (log) marginal likelihood computation based on Chib (1995) bprobit <- function( y, # vector of responses X, # design matrix m, # number of simulations desired b0 = rep(0, ncol(X)), # prior mean vector of regression coefficients T0 = diag(0, ncol(X)) # prior precision matrix of regression coefficients ) { # number of regression coefficients k <- ncol(X) # number of observations n <- nrow(X) # empty chains beta <- matrix(NA, k, m) rownames(beta) <- colnames(X) z <- matrix(NA, n, m) # initial values fit <- glm(y ~ 0 + X, family = binomial(link = probit)) beta[,1] <- fit$coef z[,1] <- X %*% beta[,1] # posterior variance matrix B1 <- chol2inv(chol(T0 + t(X) %*% X)) # Gibbs sampling for (i in 2:m) { # regression coefficients b1 <- B1 %*% (T0 %*% b0 + t(X) %*% z[,i-1]) beta[,i] <- rmnorm(1, mean = b1, varcov = B1) # latent variables mu <- X %*% beta[,i] aux <- pnorm(-mu) z[,i] <- qnorm((aux * (1 - y) + (1 - aux) * y) * runif(n) + aux * y) + mu } # marginal likelihood from the Gibbs output betaStar <- apply(beta, 1, mean) psiStar <- pnorm(X %*% betaStar) # full conditional for selected parameter value fullCondBetaStar <- rep(NA, m) for(i in 1:m) { b1 <- B1 %*% (T0 %*% b0 + t(X) %*% z[,i]) fullCondBetaStar[i] <- dmnorm(betaStar, mean = b1, varcov = B1) } fullCondBetaMean <- mean(fullCondBetaStar) # marginal likelihood on the log scale logmarlik <- dmnorm(betaStar, mean = b0, varcov = chol2inv(chol(T0)), log = TRUE) - log(fullCondBetaMean) for(i in 1:n) logmarlik <- logmarlik + dbinom(y[i], size = 1, prob = psiStar[i], log = TRUE) names(logmarlik) <- NULL # numerical standard error logmarSE <- sd(fullCondBetaStar) / (sqrt(effectiveSize(fullCondBetaStar)) * fullCondBetaMean) # return the beta chain and the log marginal likelihood (with standard error) return(list(beta = t(beta), logmarlik = logmarlik, logmarSE = logmarSE)) } # Bayesian fit show(system.time(bfit <- bprobit(mlfit$y, mlfit$x, mclen, b0 = b0, T0 = T0))) # parameter summaries parsum <- matrix(NA, ncol(mlfit$x), 8) rownames(parsum) <- colnames(mlfit$x) colnames(parsum) <- c("PriorMean", "PriorSD","PostMean", "PostSD","Post0.025", "Post0.975", "IF(AT)","MLE") # prior values if(1/g == 0) { parsum[,2] <- Inf # reference analysis }else { parsum[,1] <- b0 # prior mean parsum[,2] <- sqrt(diag(chol2inv(chol(T0)))) # prior std dev } # posterior values parsum[,3] <- apply(bfit$beta, 2, mean) parsum[,4] <- apply(bfit$beta, 2, sd) parsum[,c(5,6)] <- t(apply(bfit$beta, 2, quantile, c(0.025, 0.975))) # inefficiency factor (ratio of actual to effective sample size) parsum[,7] <- mclen / apply(bfit$beta, 2, effectiveSize) # maximum likelihood estimate parsum[,8] <- coef(mlfit) # text output to file sink("binProReg.txt") cat("\n") cat("Bayesian analysis of\n ", dataname, "\n") cat("using model\n ") show(mlfit$formula) cat("and Monte Carlo sample size\n ", mclen, "\n") cat("Parameter summaries:\n") show(parsum) cat("Log marginal likelihood:", bfit$logmarlik, "with numerical standard error", bfit$logmarSE) cat("\n") sink() # graphical output to file pdf("binProReg.pdf", height = 12, width = 12) plot(mcmc(bfit$beta)) autocorr.plot(mcmc(bfit$beta)) dev.off()