############################################## ### BINARY PROBIT REGRESSION via MH ### ############################################## ### tested on R version 2.9.0 (2009-06-15) ### ############################################## # Package by M. Plummer, N. Best, K. Cowles and K. Vines: output analysis and diagnostics for MCMC library(coda) # Package by J. Albert: functions for learning Bayesian inference library(LearnBayes) # note that package 'coda' requires package 'lattice' ######## Data analyzed by Albert (2007, 10.3) ######## dataname <- "Donner survival data" data(donner) ### maximum likelihood fit for a model of interest ### mlfit <- glm(survival ~ age + male, data = donner, family = binomial(link = probit), x = TRUE, y = TRUE) ############### prior hyper-parameters ############### g <- 45 T0 <- (1/g) * t(mlfit$x) %*% mlfit$x b0 <- rep(0, ncol(mlfit$x)) ################ MCMCparameters ################ mclen <- 10000 df <- 10 ################################################ # Bayesian probit (regression) via Metropolis-Hastings # using taylored proposal by Chib & Greenberg (1995) bprobitMHtp <- function( y, # vector of responses X, # design matrix m, # number of simulations desired b0 = rep(0, ncol(X)), # prior mean vector of regression coefficients T0 = diag(0, ncol(X)), # prior precision matrix of regression coefficients df = 1 # degrees of freedom of Student proposal ) { # non-normalized posterior density logpost <- function(beta, y, X, b0, T0) { z <- X %*% beta return(sum(y * pnorm(z, log = TRUE) + (1 - y) * pnorm(z, log = TRUE, lower = FALSE)) - 0.5 * t(beta - b0) %*% T0 %*% (beta - b0)) } # posterior mode and inverse negative hessian fit <- optim(b0, logpost, hessian = TRUE, control = list(fnscale = -1), y = y, X = X, b0 = b0, T0 = T0) betahat <- fit$par V <- chol2inv(chol(-fit$hessian)) # number of regression coefficients k <- ncol(X) # empty chain beta <- matrix(NA, m, k) # initial value beta[1,] <- betahat # Metropolis-Hastings for(i in 2:m) { betaProp <- as.vector(rmt(1, mean = betahat, S = V, df = df)) logAccept <- logpost(betaProp, y = y, X = X, b0 = b0, T0 = T0) - logpost(beta[i-1,], y = y, X = X, b0 = b0, T0 = T0) + dmt(beta[i-1,], mean = betahat, S = V, df = df, log = TRUE) - dmt(betaProp, mean = betahat, S = V, df = df, log = TRUE) if(-rexp(1) < logAccept) beta[i,] <- betaProp else beta[i,] <- beta[i-1,] } # return the beta chain return(list(beta = beta)) } # Bayesian fit show(system.time(bfit <- bprobitMHtp(mlfit$y, mlfit$x, mclen, b0 = b0, T0 = T0, df = df))) # parameter summaries parsum <- matrix(NA, ncol(mlfit$x), 8) rownames(parsum) <- colnames(mlfit$x) colnames(parsum) <- c("PriorMean", "PriorSD","PostMean", "PostSD","Post0.025", "Post0.975", "IF(AT)","MLE") # prior values if(1/g == 0) { parsum[,2] <- Inf # reference analysis }else { parsum[,1] <- b0 # prior mean parsum[,2] <- sqrt(diag(chol2inv(chol(T0)))) # prior std dev } # posterior values parsum[,3] <- apply(bfit$beta, 2, mean) parsum[,4] <- apply(bfit$beta, 2, sd) parsum[,c(5,6)] <- t(apply(bfit$beta, 2, quantile, c(0.025, 0.975))) # inefficiency factor (ratio of actual to effective sample size) parsum[,7] <- mclen / apply(bfit$beta, 2, effectiveSize) # maximum likelihood estimate parsum[,8] <- coef(mlfit) # text output to file sink("binProRegMH.txt") cat("\n") cat("Bayesian analysis of\n ", dataname, "\n") cat("using model\n ") show(mlfit$formula) cat("and Monte Carlo sample size\n ", mclen, "\n") cat("Parameter summaries:\n") show(parsum) cat("Marginal likelihood not available") cat("\n") sink() # graphical output to file pdf("binProRegMH.pdf", height = 12, width = 12) plot(mcmc(bfit$beta)) autocorr.plot(mcmc(bfit$beta)) dev.off()