###################################################################### ### A BETA-BINOMIAL MODEL FOR OVERDISPERSION (Albert, 2007, Ch. 5) ### ###################################################################### ### tested on R version 2.9.0 (2009-06-15) ### ###################################################################### # set RNG seed for reproducibility of results set.seed(610) # load required package and make data available library(LearnBayes) data(cancermortality) print(cancermortality) mortalityrates <- cancermortality$y / cancermortality$n print(round(100000 * mortalityrates)) # log-likelihood function logLik <- function(etalam, dat) { eta <- etalam[1] lam <- etalam[2] y <- dat[, 1] n <- dat[, 2] N <- nrow(dat) val <- sum(lchoose(dat$n, dat$y) + lbeta(lam * eta + y, lam * (1 - eta) + n - y)) - N * lbeta(lam * eta, lam * (1 - eta)) return(val) } # log-likelihood contour plot pdf("contourEtaLam.pdf") mycontour(logLik, c(0.0001, 0.003, 1, 20000), cancermortality) title(main = "Log-likelihood contour plot", xlab = expression(eta), ylab = expression(lambda)) dev.off() # reparametrised log-likelihood function logLikRep <- function(theta, dat) { eta <- exp(theta[1]) / (1 + exp(theta[1])) lam <- exp(theta[2]) y <- dat[, 1] n <- dat[, 2] N <- nrow(dat) val <- sum(lchoose(dat$n, dat$y) + lbeta(lam * eta + y, lam * (1 - eta) + n - y)) - N * lbeta(lam * eta, lam * (1 - eta)) return(val) } # reparametrised log-likelihood contour plot pdf("contourTheta.pdf") mycontour(logLikRep, c(-8, -4.5, 2.5, 16.5), cancermortality) title(main = "Log-likelihood contour plot", xlab = expression(theta[1]), ylab = expression(theta[2])) dev.off() # plot prior densities pdf("priorEtaLam.pdf", width = 12) # plotting prior densities layout(matrix(1:2, 1, 2)) eta <- seq(0, 1, len = 101) etaden <- dbeta(eta, 0.5, 2) plot(eta, etaden, type="l", main = "Prior", xlab = expression(eta), ylab = "Density") lam <- seq(0, 20, len = 101) lamden <- 1 / (1 + lam)^2 plot(lam, lamden, type="l", main = "Prior", xlab = expression(lambda), ylab = "Density") dev.off() # log-posterior function logPost <- function(theta, dat) { eta <- exp(theta[1]) / (1 + exp(theta[1])) lam <- exp(theta[2]) y <- dat[, 1] n <- dat[, 2] N <- nrow(dat) val <- sum(lchoose(dat$n, dat$y) + lbeta(lam * eta + y, lam * (1 - eta) + n - y)) - N * lbeta(lam * eta, lam * (1 - eta)) val <- val + 0.5 * theta[1] - 2.5 * log(1 + exp(theta[1])) + theta[2] - 2 * log(1 + exp(theta[2])) return(val) # up to a constant term } # log-posterior contour plot pdf("contourPost.pdf") mycontour(logPost, c(-8, -4.5, 2.5, 16.5), cancermortality) title(main = "Log-posterior contour plot", xlab = expression(theta[1]), ylab = expression(theta[2])) dev.off() # compute Laplace approximation lapfit <- optim(c(-5, 10), logPost, hessian = TRUE, method = "BFGS", control = list(fnscale = -1), dat = cancermortality) thetahat <- lapfit$par thetavar <- chol2inv(chol(-lapfit$hessian)) # Laplace approximation contour plot pdf("contourLaplace.pdf") mycontour(lbinorm, c(-8, -4.5, 2.5, 16.5), list(m = thetahat, v = thetavar)) title(main = "Laplace approximation contour plot", xlab = expression(theta[1]), ylab = expression(theta[2])) dev.off() # 95% posterior credible interval for eta based on the Laplace approximation thetacred <- cbind(thetahat - 2 * sqrt(diag(thetavar)), thetahat + 2 * sqrt(diag(thetavar))) etacred <- exp(thetacred[1, ]) / (1 + exp(thetacred[1, ])) print(round(100000 * etacred)) # compute marginal likelihood based on the Laplace approximation logmarlik <- 0.5 * length(thetahat) * log(2 * pi) + 0.5 * log(det(thetavar)) + logPost(thetahat, cancermortality) print(round(logmarlik, 1)) # compute marginal likelihood under the binomial model logmarlik0<- lbeta(0.5 + sum(cancermortality$y), 2 + sum(cancermortality$n) - sum(cancermortality$y)) - lbeta(0.5, 2) + sum(lchoose(cancermortality$n, cancermortality$y)) print(round(logmarlik0, 1)) # rejection sampling M <- 1000 # number of draws proposal <- list(mean = thetahat, var = 2 * thetavar, df = 4) deltaFun <- function(theta, dat, tpar) {logPost(theta, dat) - dmt(theta, mean = tpar$mean, S = tpar$var, df = tpar$df)} logC <- optim(c(-5, 10), deltaFun, method = "BFGS", control = list(fnscale = -1), dat = cancermortality, tpar = proposal) thetastar <- logC$par cat("Logarithm of the rejection sampling constant:", logC$value, "at", round(thetastar, 1), fill = TRUE) thetarej <- rejectsampling(logPost, tpar = proposal, dmax = logC$value, n = M, data = cancermortality) acceptratio <- nrow(thetarej) / M cat("Acceptance ratio:", round(acceptratio, 3), fill = TRUE) # rejection sampling contour plot pdf("contourRS.pdf") mycontour(logPost, c(-8, -4.5, 2.5, 16.5), cancermortality) points(thetarej[, 1], thetarej[, 2], pch = 20) title(main = "Posterior values via rejection sampling", xlab = expression(theta[1]), ylab = expression(theta[2])) abline(v = thetastar[1], lty = "dashed") abline(h = thetastar[2], lty = "dashed") dev.off() # posterior mean of eta based on rejection sampling etarej <- exp(thetarej[, 1]) / (1 + exp(thetarej[, 1])) etabar <- mean(etarej) etabarSE <- sd(etarej) / sqrt(length(etarej)) # i.i.d. standard error cat(etabar, "+/-", etabarSE, fill = TRUE) # 95% posterior credible interval for eta based on rejection sampling etacredrej <- quantile(etarej, c(0.025, 0.975)) print(round(etacredrej, 5)) # importance sampling g <- function(theta) exp(theta[1]) / (1 + exp(theta[1])) gthetaimp <- impsampling(logPost, tpar = proposal, h = g, n = M, data = cancermortality) cat(gthetaimp$est, "+/-", gthetaimp$se, fill = TRUE) weights <- gthetaimp$wt # boxplot of importance weights pdf("impWTboxplot.pdf") boxplot(weights) title(main = "Boxplot of importance weights") dev.off() # sampling importance resampling thetasir <- sir(logPost, tpar = proposal, n = M, data = cancermortality) etacredsir <- quantile(g(thetasir), c(0.025, 0.975)) print(round(etacredsir, 5)) # Bayesian influence function for eta influenceOnEta <- function (theta, dat) { y <- dat[, 1] n <- dat[, 2] N <- nrow(dat) m <- nrow(theta) eta <- exp(theta[, 1]) / (1 + exp(theta[, 1])) lam <- exp(theta[, 2]) summary <- quantile(eta, c(0.05, 0.5, 0.95)) summary.obs <- matrix(NA, N, 3) for (j in 1:N) { weight <- exp(lbeta(lam * eta, lam * (1 - eta)) - lbeta(lam * eta + y[j], lam * (1 - eta) + n[j] - y[j])) indices <- sample(1:m, size = m, prob = weight, replace = TRUE) theta.s <- theta[indices, ] summary.obs[j, ] <- quantile(exp(theta.s[, 1]) / (1 + exp(theta.s[, 1])), c(0.05, 0.5, 0.95)) } return(list(summary = summary, summary.obs = summary.obs)) } # use rejection sample etainfluence <- influenceOnEta(thetarej, cancermortality) # Bayesian sensitivity plot for eta pdf("sensitivityEta.pdf") plot(rep(0, 3), etainfluence$summary, type = "b", lwd = 3, xlim = c(-1, 21), ylim = c(0, 0.003), xlab = "Observation removed", ylab = expression(eta), main = "Sensitivity plot") for(j in 1:20) lines(rep(j,3), etainfluence$summary.obs[j,], type = "b") rm(j) dev.off()