## Metropolis-Hastings for logistic model ## Author: Andrea Rieber andrea.riebler*at*ifspm.uzh.ch ## Date: April 2012 ## Data from Bliss CI (1935), The Annals of Applied Biology, 22: 134-167 # the covariate values (dose) x_original <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839) # the centered covariate values x <- x_original - mean(x_original) # number of beetles killed y <- c(6, 13, 18, 28, 52, 53, 61, 60) # total number of beetles n <- c(59, 60, 62, 56, 63, 59, 62, 60) # variance of normal priors sigma <- 10^(4) # inverse logit: logit^(-1)(alpha + beta*x) mypi <- function(alpha, beta, x){ tmp <- exp(alpha + beta*x) pi <- tmp/(1+tmp) return(pi) } log.posterior <- function(par, y, n, x){ sigma <- 10^4 return(sum(dbinom(y, size=n, prob=mypi(par[1], par[2], x), log=TRUE)) + dnorm(par[1], mean=0, sd=sqrt(sigma), log=TRUE) + dnorm(par[2], mean=0, sd=sqrt(sigma), log=TRUE)) } # get an idea of the posterior posterior <- function(alpha_vec, beta_vec, y, n, x){ len_a <- length(alpha_vec) len_b <- length(beta_vec) res <- matrix(NA, len_a, len_b) for(i in 1:len_a){ for(j in 1:len_b){ res[i,j] <- exp(log.posterior(c(alpha_vec[i], beta_vec[j]), y, n, x)) } } return(res) } alpha <- seq(0.4, 1.1, length.out=100) beta <- seq(25, 45, length.out=100) z <- posterior(alpha,beta,y, n,x) par(mfrow=c(1,2)) contour(alpha,beta,z, xlab=expression(alpha), ylab=expression(beta)) persp(alpha, beta, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue", ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "alpha", ylab = "beta", zlab = "posterior") ##################################### ## MCMC settings ## ##################################### # number of MCMC iterations niter <- 30000 # burnin length burnin <- 5000 ###################################### ## univariate random walk proposals ## ###################################### alpha_samples <- c() beta_samples <- c() # number of accepted proposals alpha_yes <- 0 beta_yes <- 0 # starting values alpha <- 0 beta <- 0 # optimized standard deviations for the # normal proposal. s_alpha <- 0.45 s_beta <- 6 # thinning parameter k <- 1 # counter count <- 0 # start the MCMC algorithm (the first iteration after the burn-in is 1) for(i in -burnin:niter){ count <- count +1 ## update alpha # generate a new proposal for alpha alpha_star <- rnorm(1, alpha, sd=s_alpha) # NOTE: it is more stable to calculate everything on the log scale enum <- sum(dbinom(y, size=n, prob=mypi(alpha_star, beta, x), log=TRUE)) + dnorm(alpha_star, mean=0, sd=sqrt(sigma), log=TRUE) denom <- sum(dbinom(y, size=n, prob=mypi(alpha, beta, x), log=TRUE)) + dnorm(alpha, mean=0, sd=sqrt(sigma), log=TRUE) # log accetpance rate (since we use a random walk proposal there is no # proposal ratio in the acceptance probability) logacc <- enum - denom if(log(runif(1)) <= logacc){ # accept the proposed value alpha <- alpha_star alpha_yes <- alpha_yes + 1 } ## update beta # generate a new proposal for beta beta_star <- rnorm(1, beta, sd=s_beta) enum <- sum(dbinom(y, size=n, prob=mypi(alpha, beta_star, x), log=TRUE)) + dnorm(beta_star, mean=0, sd=sqrt(sigma), log=TRUE) denom<- sum(dbinom(y, size=n, prob=mypi(alpha, beta, x), log=TRUE)) + dnorm(beta, mean=0, sd=sqrt(sigma), log=TRUE) # log accetpance rate logacc <- enum - denom if(log(runif(1)) <= logacc){ # accept the proposed value beta <- beta_star beta_yes <- beta_yes + 1 } # after the burnin save every kth sample if((i > 0) && (i%%k == 0)){ alpha_samples <- c(alpha_samples, alpha) beta_samples <- c(beta_samples, beta) } if(i%%1000 ==0){ # print the acceptance rates on the fly cat(c(i, alpha_yes/count, beta_yes/count), "\n") } } library(MASS) #pdf("graphics/beetle_diag_unif.pdf", width=10, height=7) par(mfrow=c(3,2), mar=c(5,5,1,1), cex.lab=1.4, cex.axis=1.3) plot(alpha_samples, type="l") plot(beta_samples, type="l") truehist(alpha_samples) truehist(beta_samples) acf(alpha_samples) acf(beta_samples) #dev.off() alpha <- seq(0.4, 1.1, length.out=100) beta <- seq(25, 45, length.out=100) z <- posterior(alpha,beta,y, n,x) #pdf("graphics/beetle_update_unif.pdf", height=6, width=6) par(cex.lab=1.7, cex.axis=1.6, cex.main=2, mar=c(5,5,4,1)) contour(alpha, beta, z, xlab=expression(alpha), ylab=expression(beta), main="Univariate update", lwd=2) ## show sample walk ns <- 1000 a_unif <- rep(alpha_samples[1:ns], each=2)[-1] b_unif <- rep(beta_samples[1:ns],each=2)[-(2*ns)] lines(a_unif, b_unif, type="l", col=2, lwd=1.2) #dev.off() ###################################### ## bivariate random walk proposals ## ###################################### library(mvtnorm) # acceptance counter param_yes <- 0 # initial value param <- c(0, 0) alpha_samples <- c() beta_samples <- c() # use a normal proposal with covariance matrix proportional to the negative # inverse curvature of the log posterior at the posterior mode. postOptim <- optim (c(0, 0), fn = log.posterior, y=y, n=n, x=x, control = list (fnscale = -1), hessian = TRUE) postMode <- postOptim$par postVar <- solve (- postOptim$hessian) # tuning parameter to achieve "good" acceptance rates varFactor <- 2 proposalVar <- varFactor * postVar count <- 0 # start MCMC algorithm for(i in -burnin:niter){ # iteration counter count <- count +1 ## NOTE: Here we have only one acceptance step as alpha and beta are updated jointly # generate a new (bivariate) proposal param_star <- rmvnorm(1, mean=param, sigma=proposalVar) # we need the full posterior distribution! enum <- sum(dbinom(y, size=n, prob=mypi(param_star[1], param_star[2], x), log=TRUE)) + dnorm(param_star[1], mean=0, sd=sqrt(sigma), log=TRUE) + dnorm(param_star[2], mean=0, sd=sqrt(sigma), log=TRUE) denom <- sum(dbinom(y, size=n, prob=mypi(param[1], param[2], x), log=TRUE)) + dnorm(param[1], mean=0, sd=sqrt(sigma), log=TRUE) + dnorm(param[2], mean=0, sd=sqrt(sigma), log=TRUE) # log accetpance rate logacc <- enum-denom if(log(runif(1)) <= logacc){ # update both parameters param <- param_star param_yes <- param_yes + 1 } # after the burnin save every kth sample if((i > 0) && (i%%k == 0)){ alpha_samples <- c(alpha_samples, param[1]) beta_samples <- c(beta_samples, param[2]) } if(i%%1000 ==0){ cat(c(i, param_yes/count), "\n") } } #pdf("graphics/beetle_diag_biv.pdf", width=10, height=7) par(mfrow=c(3,2), mar=c(5,5,1,1), cex.lab=1.4, cex.axis=1.3) plot(alpha_samples, type="l") plot(beta_samples, type="l") truehist(alpha_samples) truehist(beta_samples) acf(alpha_samples) acf(beta_samples) #dev.off() alpha <- seq(0.4, 1.1, length.out=100) beta <- seq(25, 45, length.out=100) z <- posterior(alpha,beta,y, n,x) #pdf("graphics/beetle_update_biv.pdf", height=6, width=6) par(cex.lab=1.7, cex.axis=1.6, cex.main=2, mar=c(5,5,4,1)) contour(alpha, beta, z, xlab=expression(alpha), ylab=expression(beta), main="Bivariate update", lwd=2) ## show sample walk ns <- 1000 lines(alpha_samples[1:ns], beta_samples[1:ns], type="l", col=2, lwd=1.2) #dev.off() ## Fit a generalized linear model to compare m1 <- glm(formula = cbind(y, n - y) ~ x, family = binomial) summary(m1) vcov(m1) ## Note: assuming separate variances should improve the mixing ## Plot the dose-response curve # get a grid of covariate values x <- seq(1.60, 1.88, by=0.01) mypi <- function(alpha, beta, x, m){ tmp <- exp(alpha + beta*(x - m)) pi <- tmp/(1+tmp) return(pi) } # number of samples len <- length(alpha_samples) # take the last 1000 samples alpha_plot <- alpha_samples[(len-1000 +1):len] beta_plot <- beta_samples[(len-1000 +1):len] # plot the dose-response curves #pdf("graphics/dose_response_curve.pdf", width=7, height=5) par(mar=c(5,5,1,1), cex.lab=1.5, cex.axis=1.5) plot(x, mypi(alpha_plot[1],beta_plot[1],x, mean(x_original)), xlab="Dosage", ylab="Proportion dying", type="l") for(i in 2:1000){ lines(x, mypi(alpha_plot[i], beta_plot[i], x, mean(x_original)), type="l", col="grey") } # plot a separate curve based on the posterior mean lines(x, mypi(mean(alpha_samples), mean(beta_samples), x, mean(x_original)), type="l", col=2, lwd=2) points(x_original, y/n) #dev.off()