# ----------------------------------------------------------------------------------------
# Week 1 (lecture 1)
time <- c(31.7, 39.2, 57.5, 65.0, 65.8, 70.0, 75.0, 75.2, 87.5, 88.3, 94.2, 101.7)
dead <- c(1,1,1,0,1,1,0,0,1,0,0,1)

library(survival)
data1 <- Surv(time,dead)
data1
Rhat <- survfit(data1 ~ 1)
plot(Rhat, mark.time = TRUE,ylab="R(t)")
Rhat
print(Rhat, print.rmean = TRUE)
summary(Rhat)


# ----------------------------------------------------------------------------------------
# Week 1 (Lecture 2) (asymptotic weibull distribution of minimum of positive random variables)
nsim <- 5000
n <- 1000 # 100, 1000,....
x <- rlnorm(n*nsim, sdlog=1)
#x <- rexp(n*nsim)
#x <- rgamma(n*nsim, shape=.5)
x <- matrix(x,nsim,n)
xmin <- apply(x,1,min)
# fit a weibull distribution to see if it fits (writing and maximising the loglikelihood function ourself)
loglik <- function(par,x)
  -sum(dweibull(x,shape=par[1],scale=par[2],log=TRUE))
weibfit <- optim(c(1,1),loglik,x=xmin)
weibfit
par(mfrow=c(3,1))
# histogram vs fitted f(x)
hist(xmin,prob=TRUE)
curve(dweibull(x, weibfit$par[1],weibfit$par[2]),add=TRUE)
# estimated vs fitted F(x)
plot(ecdf(xmin),col="red")
curve(pweibull(x, weibfit$par[1], weibfit$par[2]),add=TRUE)
# empircal vs theoretical quantiles QQ-plot
plot(
  qweibull(ppoints(xmin), weibfit$par[1], weibfit$par[2]),  # theoretical quantiles
  sort(xmin) # empirical quantiles
  )
abline(a=0,b=1)

# hazard function for lognormal
zlnorm <- function(t,mu,sigma)
  dlnorm(t,mu,sigma)/(1-plnorm(t,mu,sigma))
curve(zlnorm(t,0,1),xname="t",0,10)
                    
# fitting the lognormal to data via survreg
time <- c(31.7, 39.2, 57.5, 65.0, 65.8, 70.0, 75.0, 75.2, 87.5, 88.3, 94.2, 101.7)
dead <- c(1,1,1,0,1,1,0,0,1,0,0,1)
library(survival)
data1 <- Surv(time,dead)
Rhat <- survfit(data1 ~ 1)
plot(Rhat)
lnormfit <- survreg(data1 ~ 1, dist="lognormal")
summary(lnormfit)
curve(1-plnorm(x, meanlog = lnormfit$coefficients, sdlog=lnormfit$scale ),add=TRUE)

# ----------------------------------------------------------------------------------------
# Week 2 (lecture 1) 
# Gumbel as asymptotic distribution of minimum of iid normals
curve(dnorm(x),-10,2,ylim=c(0,2))
for (n in 10^(1:6)) 
  curve(n*(1-pnorm(x))^(n-1)*dnorm(x),add=TRUE,n=1000)

#Gumbel as asymptotic distribution of minimum of iid normals
curve(dexp(x),0,10,ylim=c(0,1))
for (n in 10^(1:6)) 
  curve(n*(pexp(x))^(n-1)*dexp(x),add=TRUE,n=1000)

# Gumbel as asymptotic distribution of minimum of iid normals 
nsim <- 5000
n <- 5000 # 100, 1000,....
x <- rnorm(n*nsim) # Gumbel holds asymptotically but note the very slow convergence
#x <- -rexp(n*nsim) # fast convergece
#x <- rgamma(n*nsim, shape=.5)
x <- matrix(x,nsim,n)
xmin <- apply(x,1,min)
# fit a weibull distribution to see if it fits (writing and maximising the loglikelihood function ourself)
gumbfit <- survreg(Surv(xmin) ~ 1, dist="extreme")
pgumbel <- function(q,loc=0,scale=1) 1-exp(-exp((q-loc)/scale))
qgumbel <- function(p,loc=0,scale=1) loc + scale * log(-log(1-p))
dgumbel <- function(x,loc=0,scale=1) {x <- (x-loc)/scale; exp(x-exp(x))/scale}
par(mfrow=c(3,1))
hist(xmin,prob=TRUE,breaks=50)
curve(dgumbel(x,gumbfit$coefficients,gumbfit$scale),add=TRUE)
plot(ecdf(xmin),col="red")
curve(pgumbel(x,gumbfit$coefficients,gumbfit$scale),add=TRUE)
plot(sort(xmin), qgumbel(ppoints(xmin), gumbfit$coefficients,gumbfit$scale), cex=.3) 
abline(a=0,b=1)

# Fitting the Weibull via log-location-scale representation used by survreg
time <- rweibull(n=100,shape=3,scale=10) # Usual Weibull parameterization
weibfit2 <- survreg(Surv(time) ~ 1, dist="weibull")
summary(weibfit2)
exp(weibfit2$coefficients) # recovered scale from Gumbel location
1/weibfit2$scale # and shape parameter from Gumbel scale

# Week 2, lecture 2
# First simulate some randomly right censored data
n <- 200
alpha <- 2
T <- rweibull(n,shape=alpha,scale=1)
C <- runif(n,0,2*median(T))
# (Y,delta)-representation
time <- pmin(T,C) 
delta <- ifelse(time==T,1,0)
treatment <- sample(0:2,replace = TRUE,size=n)

# Kaplan-Merier estimate
Rhat <- survfit(Surv(time,delta)~1)
print(Rhat, print.rmean=TRUE)
plot(Rhat)
# Kaplan-Meier estimate of the survival function R(t) and confidence intervals
# based on assuming normality of Rhat(t), log(Rhat(t)) and log(-log(Rhat(t)))
par(mfrow=c(3,1))
for (i in c("plain","log","log-log")) {
  Rhat <- survfit(Surv(time,delta)~1, conf.type=i) # 
  plot(Rhat)
  curve(exp(-x^alpha),add=TRUE,col="green") # true survival function
}
# Estimating E(T) = MTTF and median(T) from incomplete non-parametric estimate of survival function
abline(a=.5,b=0,col="blue")
print(Rhat, print.rmean=TRUE)

# Nelson-Aalen estimator of Z(t) 
# using number of failures d_i and number at risk n_i computed via survfit
par(mfrow=c(1,1))
Zhat.NA <- cumsum(Rhat$n.event/Rhat$n.risk)
curve(t^alpha, 0, 2, xname="t") # True cumulative hazard
plot(Rhat$time, Zhat.NA) # Nelson-Aalen estimate
Zhat.KM <- -log(Rhat$surv) # Kaplan-Meier estimate (of Z(t))
points(Rhat$time, Zhat.KM, col="red")


# ----------------------------------------------------------------------------------------
# Week 3, lecture 2
# TTT-plot (see algorithm in slides 6)
# compute total time on test for all observations
# First sort in increase order
n <- length(time)
permutation <- order(time)
time <- time[permutation]
delta <- delta[permutation]
Tau <- n*time[1] # total time on test at all censoring and failure times
for (i in 2:n) 
  Tau[i] <- Tau[i-1] + (n-i+1)*(time[i]-time[i-1])
# Compute new Y's containing TTT at time of failures
# Under H0 of exponentiality these are arrival times in a homogeneous Poisson process
Y <- Tau[delta==1] #
k <- length(Y)
i <- 1:(k-1)
plot(i/k, Y[i]/Y[k], xlim=c(0,1), ylim=c(0,1)) # hence they behave as orderstats of n-1 iid unif(0,1) variates
# Barlow-Proschan statistic
W <- sum(Y[i]/Y[k])
Z <- (W-(k-1)/2)/sqrt((k-1)/12)
Z
2*pnorm(abs(Z),lower.tail=FALSE)


# ----------------------------------------------------------------------------------------
# Week 4
# log-rank test
library(survival)
library(MASS)
attach(Melanoma)
Melanoma
data <- Surv(time,ifelse(status==1,1,0))
plot(survfit(data~sex),conf.int=TRUE,col=rep(c(2,1),each=3))
legend("topright",c("males","females"),col=c(1,2),lty=1)
survdiff(data ~ sex)
summary(coxph(data ~sex))
survdiff(Surv(time,delta)~treatment)

# example in Moore, p. 46
tt <- c(6, 7, 10, 15, 19, 25)
delta <- c(1, 0, 1, 1, 0, 1)
trt <- c(0, 0, 1, 0, 1, 1)
survdiff(Surv(tt, delta) ~ trt)
# the same computed "manually"
d0 <- c(1,0,1,0)
e0 <- c(.5,.25,.3333333,0)
v0 <- c(.25,.1875,.222222,0)
(sum(d0-e0))^2/sum(v0)

# difference between approximations based on the (multi)hypergeometric and based on alternative test statistic in slides 6
library(survival)
n <- 20
k <- 2
treatment <- sample(1:k,replace = TRUE,size=n)
C <- runif(n,0,5)
alpha <- 1
nsim <- 10000
chisq <- data.frame(hypergeom=double(nsim),multinom=double(nsim))
colnames(chisq) <- c("hypergeom","multinom")
for (i in 1:nsim) {
  T <- rweibull(n,shape=alpha,scale=1)
  time <- pmin(T,C) 
  delta <- ifelse(time==T,1,0)
  sdiff <- survdiff(Surv(time,delta)~treatment)
  chisq$hypergeom[i] <- sdiff$chisq
  chisq$multinom[i] <- with(sdiff,sum((obs-exp)^2/exp))
}
par(mfcol=c(3,2))
for (type in names(chisq)) {
  plot(qchisq(ppoints(nsim),df=k-1),sort(chisq[[type]]),pch=".",ylab=type)
  abline(a=0,b=1)
  plot(ecdf(chisq[[type]]))
  curve(pchisq(x,df=k-1),add=TRUE,col="green")
  hist(chisq[[type]],breaks=50,prob=TRUE,xlim=c(0,12),main=type)
  curve(dchisq(x,df=k-1),add=TRUE)
  cat("type I error rate for",type,"approximation:",mean(chisq[[type]]>qchisq(.95,df=k-1)),"\n")
}


# ----------------------------------------------------------------------------------------
# Week 4, lecture 2, maximum likelihood
# Right censored observations from exponential model
n <- 100
tt <- rexp(n) # scale parameter theta=1
y <- ifelse(tt>1, 1, tt) # right censoring at t=1
delta <- ifelse(tt>1, 0, 1)
par(mfrow=c(2,1))
matplot(rbind(0,y),rbind(1:n,1:n),type="l",lty=1,col=1,xlab="time",ylab="unit")
points(y,1:n,pch=ifelse(delta==1,"x","|"))
Rhat <- survfit(Surv(y,delta)~1)
plot(Rhat)
print(Rhat, print.rmean=TRUE)

s <- sum(y) # total time on test
r <- sum(delta) # number of failures
thetahat <- s/r # MLE of theta
se <- thetahat/sqrt(r)

# ----------------------------------------------------------------------------------------
# Week 5, lucture 1 many confidence intervals and one credible interval
# 1) Confidence interval based on asymptotic normality of thetahat
c(thetahat - 1.96*se, thetahat + 1.96*se) # conf.interval for MTTF = ET = theta

# 2) Survreg fits the exponential via its log-location-scale representation
# involving a location parameter mu equal to the log of the scale parameter theta 
# of the exponential.  The confidence interval for mu = log(theta) is computed
# based on the observed Fisher information and normality of muhat.  This can 
# then be backtransformed
# to a confidence-interval for theta as follows
exp(confint(survreg(Surv(y,delta) ~ 1, dist="exponential")))
# 3) There is no coincidence that this is exactly same interval as the one described in slides no. 8 
# (based on asymptotic normality of log(thetahat)) and variance obtained from var(thetahat) by the delta method
thetahat*exp(c(-1,1)*qnorm(.975)/sqrt(r))

# 4) Likelihood based confidence interval (uses 2(l(thetahat)-l(theta)) as pivot)
l <- function(theta) -r*log(theta)-s/theta
ldiff <- function(theta) l(theta) - l(thetahat) + qchisq(.95,df=1)/2
c(uniroot(ldiff,c(0.1,thetahat))$root,
  uniroot(ldiff,c(thetahat,10))$root)

# 5) Exact for type II censoring (experiment end at the r'th failure, r prespecified)
# This doesn't really apply because the censoring time (and not r) is prespecified here
2*thetahat*r/qchisq(c(.975,.025),df=2*r)

# 6) Bayesian credible interval following via Bayes theorem is using a 
# non-informative scale prior on thet (or, equivalently, on the rate lambda).  
# This applies regardless of censoring mechanism but note difference in meaning!
2*s/qchisq(c(.975,.025),df=2*r)

# ----------------------------------------------------------------------------------------
# Week 5, lecture 2
# Left censoring, Baboon example, see p. 189 in Moore 2016.
library(ssym)
data("Baboons")
n <- nrow(Baboons)
par(mfrow=c(2,1))
with(Baboons,{
  plot(NA,xlim=c(0,24),ylim=c(1,n))
  matlines(rbind(0,t),rbind(1:n,1:n),col=1,lty=1)
  points(t,1:nrow(Baboons),pch=ifelse(cs==0,"x","<"))
})
Baboons <- within(Baboons,{
  t.right <- t
  t.left <- t
  t.left[cs==1] <- .0001 # left censored observations
  surv <- Surv(t.left,t.right,type="interval2")
})
# First compute a non-parametric estimate (this is outside the scope of the course)
source("https://bioconductor.org/biocLite.R")
biocLite("Icens")
library(interval)
plot(icfit(surv ~ 1, data=Baboons),xlim=c(0,24))
# Next fit a parametric model to the censored data
weibfit <- survreg(surv~1,dist="weibull", Baboons)
curve(1-pweibull(x,scale=exp(weibfit$coefficients),shape=1/weibfit$scale),add=TRUE)


# Left truncated data, example from Moore 2016, section 3.5 
tt <- c(7, 6, 6, 5, 2, 4)
delta <- c(0, 1, 0, 0, 1, 1)
backTime <- c(-2, -5, -3, -3, -2, -5)
tm.enter <- -backTime
tm.exit <- tt - backTime
n <- length(tt)
matplot(rbind(tm.enter,tm.exit),rbind(1:n,1:n),type="l",col=1,lty=1,xlim=c(0,max(tm.exit)))
points(tm.exit,1:n,pch=ifelse(delta==1,"x","|"))
points(tm.enter,1:n,pch="(")
# Non-parametric models: Kaplan-Meier estimate of R(t)
Rhat <- survfit(Surv(tm.enter, tm.exit, delta, type="counting") ~ 1)
plot(Rhat)   
summary(Rhat)
# Parametric models: survreg in the survival package doesn't handle left truncation, however, 
# (nor does MINITAB) but luckily a sweede wrote a package that does.  This package 
# uses yet another parameterization of the weibull (the logs of the scale and shape)
library(eha)
weibfit <- aftreg(Surv(tm.enter, tm.exit, delta, type="counting") ~ 1, 
                  dist="weibull")
summary(weibfit)
curve(1-pweibull(x,scale=exp(weibfit$coefficients[1]),shape=exp(weibfit$coefficients[2])),add=TRUE)

# ----------------------------------------------------------------------------------------
# Feb 20: Parametric inference for Weibull models with right censored data
gastricXelox <- read.csv("https://www.math.ntnu.no/~jarlet/levetid/gastricXelox.csv")
data <- Surv(gastricXelox$timeWeeks,gastricXelox$delta)
weibfit <- survreg(data ~ 1,dist="weibull")
summary(weibfit)

# our own implementation of the log likelihood function
weib.loglik <- function(par, data) {
  theta <- par[1]
  alpha <- par[2]
  y <- data[,"time"]
  delta <- data[,"status"]
  r <- sum(data[,"status"])
  r*log(alpha) - r*alpha*log(theta) + (alpha-1)*sum(log(y[delta==1])) - sum((y/theta)^alpha)
}
# verify that maximising this yields the same results
opt <- optim(c(1,1),weib.loglik, data=data, control=list(fnscale=-1), hessian=TRUE)
opt$par
c(exp(weibfit$coefficients), 1/weibfit$scale)
# standard errors
sqrt(diag(solve(-opt$hessian)))

# Estimate of ET
EThat <- exp(weibfit$coefficients)*gamma(weibfit$scale + 1)
# Asymptotic estimate of the variance-covariance matrix of the parameters
# note that this involves yet another parameterization (in terms of the log scale)
vcov(weibfit)
# Delta method
# derivatives of ET with respect to mu and log(scale)
der <- c(EThat, exp(weibfit$coefficients)*digamma(weibfit$scale + 1)*weibfit$scale) 
varEThat <- t(der) %*% vcov(weibfit) %*% der 
seEThat <- sqrt(varEThat) # standard error of EThat
seEThat 
print(survfit(data~1),print.rmean=TRUE) # seems reasonable compared to K-M estimate

# Testing H0: alpha=0 vs H1: alpha!=0
expfit <- survreg(data ~ 1,dist="exponential")
W <- 2*(logLik(weibfit)-logLik(expfit)) # test statistics
W
qchisq(.95, df = 2 - 1) # critical value
pchisq(W,df = 1,lower.tail = FALSE) # p - value


# profile confidence interval for shape parameter alpha
weib.profile.loglike <- function(alpha, data) {
  y <- data[,"time"]
  delta <- data[,"status"]
  r <- sum(delta)
  theta <- (sum(y^alpha)/r)^(1/alpha) # mle of theta given alpha
  weib.loglik(c(theta,alpha), data=data) # log.lik. maximised w.r.t. theta (just a function alpha)
}
alpha <- seq(.5,1.5,len=100)
ploglik <- numeric(100)
for (i in seq_along(alpha)) {
  ploglik[i] <- weib.profile.loglike(alpha[i], data=data)
}
plot(alpha,ploglik,type="l")
abline(b=0, a=opt$val - qchisq(.95,df=1)/2)

# 2-d (1-alpha) likelihood based confidence set
x <- seq(20, 200, len=100)
y <- seq(.5,2, len=100)
z <- matrix(NA,100,100)
for (i in 1:100) 
  for (j in 1:100)
    z[i,j] <- weib.loglik(c(x[i],y[j]),data=data) 
confset <- contourLines(x, y, z, levels=opt$val - qchisq(0.05, df=2, lower.tail=FALSE)/2)[[1]]
plot(confset$x,confset$y)


# ---------------------------------------------------------------------------------------
# Febr. 23: Ex. cont.
# Weibull probability plot
weibprobplot <- function(data) {
  Rhat <- survfit(data~1)
  r <- sum(data[,"status"])
  subset <- data[,"status"]==1
  Rhathat <- (Rhat$surv[subset]+c(1,Rhat$surv[subset][-r]))/2
  tfailure <- Rhat$time[subset]
  plot(log(tfailure), log(-log(Rhathat)))
}
# the plot for gastricXelox data
weibprobplot(data)
# the plot for data that is truly weibull
time <- rweibull(100,shape=3)
weibprobplot(Surv(time))
survreg.distributions$lognormal

# General probability plot for any derived distribution available in survreg
# using functions available in survreg.distributions (see its help page for details)
probabilityplot <- function(data,dist="weibull", n=20, level=.05) {
  Rhat <- survfit(data~1) # Compute KM-estimate of R(t)
  subset <- data[,"status"]==1 # subset of observed failures
  Rhathat <- (Rhat$surv[subset]+c(1,Rhat$surv[subset][-r]))/2 # adjusted KM-estimate at each failure time
  tfailure <- Rhat$time[subset] # time of observed failures
  parent <- if (is.function(dist))
    dist$dist
  else
    survreg.distributions[[dist]]$dist # parent distribution (gumbel, normal or logistic)
  quantileParent <- survreg.distributions[[parent]]$quantile # quantile function of parant distribution
  plot(log(tfailure),
       quantileParent(1-Rhathat), 
       xlab=expression(log(t[(i)])),
       ylab="parent quantile",
       main=paste("Probability plot for",dist)) # make the plot
  fit <- survreg(data~1, dist=dist)
  a <- -fit$coefficients/fit$scale
  b <- 1/fit$scale
  abline(a,b)
  lntt <- seq(min(log(tfailure)),max(log(tfailure)),len=n)
  yhat <- a + b*lntt
  der <- cbind(-1/fit$scale, -yhat)
  vcov <- vcov(fit)
  if (nrow(vcov)==1)
    vcov <- matrix(c(as.vector(vcov),0,0,0),2,2)
  se <- sqrt(diag(der %*% vcov %*% t(der)))
  for (sign in c(-1,1))
    lines(lntt, yhat+sign*qnorm(level)*se, col="red")
}
par(mfrow=c(3,2))
probabilityplot(data,"loglogistic")
probabilityplot(data,"lognormal")
probabilityplot(data,"weibull")
probabilityplot(data,"exponential")
probabilityplot(data,"rayleigh")

# Threshold Weibull model
# Pike (1966) Cancer data
y <- c(143, 163, 188, 188, 190, 192, 206, 209, 213, 216, 220, 227, 230, 234, 246, 265, 304, 216, 244)
delta <- rep(c(1,0),c(17,2))
data <- Surv(y,delta)
data

par(mfrow=c(2,1))
# profile likelihoood for gamma (work with the log of min(y)-delta to avoid numerical underflow)
log.ymin.gamma <- seq(5,-25,length=100)
profilegamma <- numeric(100)
for (i in seq_along(profilegamma)) {
  data.shifted <- Surv(y - min(y) + exp(log.ymin.gamma[i]), delta)
  profilegamma[i] <- logLik(survreg(data.shifted ~ 1))
}
plot(min(y)-exp(log.ymin.gamma),profilegamma,type="l",xlab="gamma",xlim=c(0,min(y)+.1))
abline(v=min(y),lty=3)
plot(log.ymin.gamma,profilegamma,type="l")

# profile log likelihood of l(alpha,gamma) treating
gridres <- 200
alpha <- seq(.1,3,len=gridres)
gamma <- seq(.8*min(y),min(y)-.001,len=gridres)
l <- matrix(NA,gridres,gridres)
for (i in seq_along(gamma))
  for (j in seq_along(alpha)) { 
    scale <- (sum((y - gamma[i])^alpha[j])/length(y))^(1/alpha[j]) # mle of theta given gamma and alpha
    l[i,j] <- sum(dweibull(y-gamma[i], shape=alpha[j], scale=scale, log=TRUE))
  }
contour(gamma,alpha,l,xlab="gamma", ylab="alpha",levels=seq(max(l),by=-1,len=100))
lines(c(min(y),min(y)),c(0,1),col="red")

# Bayesian inference with scale priors on shape and scale
# and uniform on location parameter gamma
# Reparameterize in terms of log(shape), log(scale) and
# log(y_min - gamma)
library(MCMCpack)
logposterior <- function(par,y) {
  gamma <- min(y) - exp(par[3])
  sum(dweibull(y-gamma,exp(par[1]),exp(par[2]),log=TRUE)) + par[3]
}
y <- rweibull(100,shape=.8,scale=10) + 1
chain0 <- MCMCmetrop1R(logposterior, rep(0,3), y=y, V=.01*diag(3))
chain <- MCMCmetrop1R(logposterior, rep(0,3), y=y, V=var(chain0))
plot(exp(chain))
summary(exp(chain))
plot(chain)


# Survival regression
library(survival)
library(MASS)
attach(Melanoma)
Melanoma
head(Melanoma,15)
data <- Surv(time,ifelse(status==1,1,0))
plot(survfit(data~sex),col=rep(c(2,1),each=1))
legend("topright",c("males","females"),col=c(1,2),lty=1)
survdiff(data ~ sex)
sreg <- survreg(data ~ sex, dist="weibull")
summary(sreg)
exp(9.11)*gamma(.912+1) # ET for males
exp(9.11 - 0.61)*gamma(.912+1) # ET for males
predict(sreg, newdata=data.frame(sex=0:1))
sreg0 <- survreg(data ~ 1, dist="weibull")
# likelihood ratio test
W <- 2*(logLik(sreg)-logLik(sreg0))
pchisq(W, df=1, lower.tail=FALSE)
drop1(sreg, test="Chisq")

# more covariates
sreg2 <- survreg(data ~ sex + thickness + age, dist="weibull")
summary(sreg2)
drop1(sreg2, test="Chisq")


# Example loosely based on Moore cp. 6.3
library(asaur)
data(pharmacoSmoking)
attach(pharmacoSmoking)
# levels of ageGroup4 and employment
class(employment)
employment
levels(ageGroup4)
# how to change the reference category with relevel
ageGroup4 <- relevel(ageGroup4, ref="65+")
ageGroup4  <- relevel(ageGroup4, ref="21-34")
ageGroup4
# Model A with ttr (time in days until relapse, relapse status) as response and ageGroup4 as covariate 
modelA <- survreg(Surv(ttr+.5, relapse) ~ ageGroup4, dist="weibull")
summary(modelA)
# the model matrix (dummy encoding of the four levels of ageGroup4)
head(model.matrix(~ ageGroup4 + employment))
# how to change contrasts used 
contrasts(employment) <- "contr.sum" # used by Mette in Linear Statistical Methods
contrasts(employment)
contrasts(employment) <- "contr.treatment" # the default
contrasts(employment)
# Model B obtained by including instead employment (full-time, part-time or other) as the covariate
modelB <- survreg(Surv(ttr+.5,relapse) ~ employment, dist="weibull")
summary(modelB)
# Model C including both ageGroup4 and employment as covariates
modelC <- survreg(Surv(ttr+.5,relapse) ~ ageGroup4 + employment, dist="weibull")
summary(modelC)
# predicted lifetime for a subject in first agegroup = 21-34 and employment = parttime

# Testing A vs C
pchisq(2*(logLik(modelC) - logLik(modelA)), df=2, lower.tail = FALSE)
# Testing B vs C
pchisq(2*(logLik(modelC) - logLik(modelB)), df=3, lower.tail = FALSE)
# The same with anova
anova(modelA, modelC)
# The same with drop1
drop1(modelC, test="Chisq")
# AIC
extractAIC(modelA)
extractAIC(modelB)
extractAIC(modelC)
# automatic model selection by AIC using stepwise algorithm (use with caution)
modelAll <- survreg(Surv(ttr+.5, relapse) ~ grp + gender + race + employment + yearsSmoking + levelSmoking +
                          ageGroup4 + ageGroup2 + priorAttempts + longestNoSmoke, dist="weibull")
result.step <- step(modelAll)

# Probability plot version 2.  The input x is a fitted survreg object possibly with covariates
probabilityplot2 <- function(x, n=20, level=.05,...) {
  # first compute standardized residuals for all failures and censoring events
  sresid <- (x$y[,1] - x$linear.predictors)/x$scale
  Rhat <- survfit(Surv(sresid, x$y[,2])~1) # compute the K-M estimate based on the standardized residuals
  subset <- x$y[,2]==1 # subset of units that failed
  r <- sum(subset) # number of observed failures
  Rhathat <- (Rhat$surv[subset]+c(1, Rhat$surv[subset][-r]))/2 # adjusted KM-estimate at each failure time
  u <- Rhat$time[subset] # standarized residuals for the observed failures
  parent <- if (is.function(x$dist))
    x$dist$dist
  else
    survreg.distributions[[x$dist]]$dist # parent distribution (gumbel, normal or logistic)
  quantileParent <- survreg.distributions[[parent]]$quantile # quantile function of parent distribution
  plot(u,
       quantileParent(1-Rhathat), 
       xlab="standardized residual",
       ylab="parent quantile",
       main=paste("Probability plot for sresidfor",x$dist),...) # make the plot
  abline(a=0,b=1)
}
par(mfrow=c(2,2))
probabilityplot2(result.step,ylim=c(-4,1),xlim=c(-3,1))
probabilityplot2(modelA,ylim=c(-4,1),xlim=c(-3,1))
probabilityplot2(survreg(Surv(ttr+.5,relapse)~1,dist="weibull"),ylim=c(-4,1),xlim=c(-3,1))

## Monday March 6: Cox proportional hazard model
summary(modelA)
modelA.coxph <- coxph(Surv(ttr,relapse) ~ ageGroup4)
modelA.coxph

# Simple toy example
y <- c(5,10,40,80,120,400,600)
delta <- c(0,1,0,0,1,1,0)
x <- c(12,10,3,5,3,4,1)
# fit the model using coxph in survival-package

# Find the MLE by plotting the log likleihood for -1 < beta <- 8
L <- function(beta) 
  exp(10*beta)/(exp(10*beta) + exp(3*beta) + exp(5*beta) + exp(3*beta) + exp(4*beta) + exp(beta))*
  exp(3*beta)/(exp(3*beta) + exp(4*beta) + exp(beta))*
  exp(4*beta)/(exp(4*beta) + exp(beta))
l <- function(beta) log(L(beta))
curve(l, -1, 8)
fit <- optim(0, l, control=list(fnscale=-1), hessian=TRUE)
fit$par
# check the standard errors
sqrt(1/(-fit$hessian))

# plot the confidence limits
abline(h = fit$val - qchisq(.95,df=1)/2)
# verify the the regression coefficient is approximately that of the weibull times -alpha
confint(coxph(Surv(y,delta) ~ x))

# Thursday, march 9.  
# Prop. haz. property of weibull model
theta <- c(1,2)
alpha <- 1.5
par(mfcol=c(2,2))
curve(dweibull(x,alpha,theta[1]),0,5,ylab="density")
curve(dweibull(x,alpha,theta[2]),add=TRUE,col="red")
curve(dweibull(x,alpha,theta[1])/(1-pweibull(x,alpha,theta[1])),0,5,ylab="hazard")
curve(dweibull(x,alpha,theta[2])/(1-pweibull(x,alpha,theta[2])),add=TRUE,col="red")
# Non-prop. haz property of the lognormal
sigma <- .2
mu <- c(0,1)
curve(dlnorm(x,mu[1],sigma),0,5,ylab="density")
curve(dlnorm(x,mu[2],sigma),add=TRUE,col="red")
curve(dlnorm(x,mu[1],sigma)/(1-plnorm(x,mu[1],sigma)),0,5,ylab="hazard")
curve(dlnorm(x,mu[2],sigma)/(1-plnorm(x,mu[2],sigma)),add=TRUE,col="red")


# Cox prop. haz model vs Weibull regression.  Loss of power from non-parametric part of Cox model
set.seed(1)
x <- rnorm(10)
y <- rweibull(length(x), scale=exp(.5*x),shape=2) # True value prop.haz regression coefficient = -0.2
summary(weibreg <- survreg(Surv(y) ~ x, dist="weibull"))
(coxreg <- coxph(Surv(y) ~ x))

betatilde <- -weibreg$coefficients["x"]/weibreg$scale
der <- t(c(0, -1/weibreg$scale, -betatilde))
varbetatilde <- der %*% vcov(weibreg) %*% t(der)
sqrt(varbetatilde) 

# about 36% information loss for n=10
varbetatilde/vcov(coxreg)


# Monday, March 13
# Example 2: Comparing two groups: Lifetimes of two batches of sodium sulphur batteries
cat("y,delta,x
    164,1,0
    164,1,0
    218,1,0
    230,1,0
    263,1,0
    467,1,0
    538,1,0
    639,1,0
    669,1,0
    917,1,0,
    1148,1,0
    1678,0,0
    1678,0,0
    1678,0,0
    76,0,1
    82,0,1
    210,1,1
    315,1,1
    385,1,1
    412,1,1
    491,1,1
    504,1,1
    522,1,1
    646,0,1
    678,1,1
    775,1,1
    884,1,1
    1131,1,1
    1456,1,1
    1824,1,1
    1827,1,1
    2248,1,1
    2385,1,1
    3077,1,1",file="/tmp/tmp.dat")
data <- read.table("/tmp/tmp.dat",header=TRUE,sep=",")
attach(data)
par(mfrow=c(2,1))
## Separate K-M estimates
plot(survfit(Surv(y,delta)~x))
## Log rank test
survdiff(Surv(y,delta)~x)
## Coxph model
coxreg <- coxph(Surv(y,delta) ~ x, ties="exact") # ties = "exact"/"breslow"
coxreg
## significance of x via z-test
summary(coxreg)
## Significance of x via drop1(  , test="Chisq") (compare to log-rank test)
drop1(coxreg, test="Chisq")
# Obtaining estimate of baseline hazard given betahat
summary(survfit(coxreg))
plot(survfit(coxreg))
# Obtaining estimate of R_0(t)exp(beta^T x) for x=1
plot(survfit(coxreg, newdata=data.frame(x=0:1)))

# Cox Snell residuals defined as
#   V_i = Zhat(y_i)
# are not directly available in R but thesecan be computed via
# Martingale residuals (the default) as these are
# defined as 
#   M_i = delta_i - Zhat(y_i) 
# where delta_i is the censoring indicator
coxsnellresid <- delta - resid(coxreg,type="martingale")
# generalized cox snell (replace censored cox snell by 
# replacing these with their conditional expectation
coxsnellresid <- coxsnellresid + (1-delta)
hist(coxsnellresid)
# qq-plot (empirical vs theoretical quantiles should fall on a straight line)
plot(sort(coxsnellresid),qexp(ppoints(coxsnellresid)))
abline(a=0,b=1)

# log minus log plot to inspect proportional hazard assumption
Rhat <- survfit(Surv(y,delta) ~ x) # this contains _two_ survival curves
# These can be subscripted to produce estimates of cumulative hazard 
# for each level of x as follows:
plot(Rhat[1]$time,log(-log(Rhat[1]$surv)), ylim=c(-3,1),xlim=c(0,3000),type="s")
points(Rhat[2]$time,log(-log(Rhat[2]$surv)),col=2,type="s")

# vs log(t) instead of vs t
plot(log(Rhat[1]$time),log(-log(Rhat[1]$surv)), ylim=c(-3,1),type="s")
points(log(Rhat[2]$time),log(-log(Rhat[2]$surv)),col=2,type="s")


# March 14, martingale, deviance, schoenfeld, Cox-Snell residuals
# # # # # # # # # # # # # #
# Section 7.1.1 martingale and deviance residuals
# # # # # # # # # # # # # #
library(asaur)
attach(pharmacoSmoking)
priorAttemptsT <- priorAttempts
priorAttemptsT[priorAttempts > 20] <- 20 

library(survival)
result.0.coxph <- coxph(Surv(ttr, relapse) ~ 1)
rr.0 <- residuals(result.0.coxph, type="martingale") 

# Figure 7.1

# First, we need the "smoothSEcurve"  function (also in the Appendix)
smoothSEcurve <- function(yy, xx) {
  # use after a call to "plot"
  # fit a lowess curve and 95% confidence interval curve
  xx.list <- min(xx) + ((0:100)/100)*(max(xx) - min(xx)) # make list of x values
  
  # Then fit loess function through the points (xx, yy) at the listed values
  yy.xx <- predict(loess(yy ~ xx), se=T, newdata=data.frame(xx=xx.list))
  
  lines(yy.xx$fit ~ xx.list, lwd=2)
  lines(yy.xx$fit - qt(0.975, yy.xx$df)*yy.xx$se.fit ~ xx.list, lty=2)
  lines(yy.xx$fit + qt(0.975, yy.xx$df)*yy.xx$se.fit ~ xx.list, lty=2)
}

# Now the plot
par(mfrow=c(3,2))
plot(rr.0 ~ age)
smoothSEcurve(rr.0, age)
title("Martingale residuals\nversus age")

logAge <- log(age)
plot(rr.0 ~ logAge)
smoothSEcurve(rr.0, logAge)
title("Martingale residuals\nversus log age")

plot(rr.0 ~ priorAttemptsT)
smoothSEcurve(rr.0, priorAttemptsT)
title("Martingale residuals versus\nprior attempts")

logPriorAttemptsT <- log(priorAttemptsT + 1)
plot(rr.0 ~ logPriorAttemptsT)
smoothSEcurve(rr.0, logPriorAttemptsT)
title("Martingale residuals versus\nlog prior attempts")

plot(rr.0 ~ longestNoSmoke)
smoothSEcurve(rr.0, longestNoSmoke)
# Note that "\n"  is a "newline" indicator
title("Martingale residuals versus\nlongest period without smoking")

logLongestNoSmoke <- log(longestNoSmoke+1)
plot(rr.0 ~ logLongestNoSmoke)
smoothSEcurve(rr.0, logLongestNoSmoke)
title("Martingale residuals versus\nlog of longest period without smoking") 

# # # # #

result.grp.coxph <- coxph(Surv(ttr, relapse) ~ grp) 
result.step <- step(result.grp.coxph, scope=list(upper=~ grp +
                                                   gender + race + employment + yearsSmoking + 
                                                   levelSmoking + age + priorAttemptsT + 
                                                   logLongestNoSmoke, lower=~grp) )
result.step

# Figure 7.2

rr.final <- residuals(result.step, type="martingale") 
par(mfrow=c(2,2))
plot(rr.final ~ age)
smoothSEcurve(rr.final, age)
title("Martingale residuals\nversus age")

plot(rr.final ~ grp)
title("Martingale residuals\nversus treatment group")

plot(rr.final ~ employment)
title("Martingale residuals\nversus employment") 

par(mfrow=c(2,2))
cox.zph(result.step)
plot(cox.zph(result.step))

# # # # # # # # # # # # # #
# Section 7.2.1 checking the proportional hazards assumption; log cumulative hazard plots
# # # # # # # # # # # # # #

attach(pancreatic2)
pfs.month <- pfs/30.25
result.surv.LA <- survfit(Surv(pfs.month) ~ stage, subset={stage == "LA"})
time.LA <- result.surv.LA$time
surv.LA <- result.surv.LA$surv
cloglog.LA <- log(-log(surv.LA))
logtime.LA <- log(time.LA)

result.surv.M <- survfit(Surv(pfs.month) ~ stage, subset={stage == "M"})
time.M <- result.surv.M$time
surv.M <- result.surv.M$surv
cloglog.M <- log(-log(surv.M))
logtime.M <- log(time.M)

# Fig 7.4
windows()
plot(cloglog.LA ~ logtime.LA, type="s", col="blue", 
     lwd=2, xlab="Log time", ylab="Complementary log-log survival")
lines(cloglog.M ~ logtime.M, col="red", lwd=2, type="s")
legend("bottomright", legend=c("Locally advanced", "Metastatic"), col=c("blue","red"), lwd=2)

# # # # # # # # # # # # # #
# Section 7.2.2 checking the proportional hazards assumption; Schoenfeld residuals
# # # # # # # # # # # # # #
result.coxph <- coxph(Surv(pfs.month) ~ stage) 
result.sch.resid <- cox.zph(result.coxph, transform="km")

# Fig 7.5
plot(result.sch.resid)
result.sch.resid

cox.zph(result.coxph, transform="rank")
cox.zph(result.coxph, transform="identity")


# Thursday, March 23
# Accelerated life testing, Insulation data.  Detoriation of insulation used for
# electric motors normally running at temperatures of 80 to 100 degrees Centigrade.
library(survival)
data <- read.table("https://www.math.ntnu.no/emner/TMA4275/2017v/datasett/Insulate.txt", header=TRUE)
attach(data)
inverseTemp <- 1/(Temp + 273.16) # Inverse temperature in degrees Kelvin
delta <- ifelse(Censor=="F",1,0)

# Fit a weibull regression with the Inverse of Temperature in Kelvin as stressor
# (the Arrhenius model)
weibreg <- survreg(Surv(FailureT,delta) ~ inverseTemp, dist="weibull")
summary(weibreg)
vcov(weibreg)
# Then compute predicted values of the median an associated 95% confidence bands
# at different temperatures (Relation Plot in MINITAB/Slide 13)
tempRange <- seq(80,170,by=5)
pred <- predict(weibreg,
        type="uquantile", p=.5,
        newdata=data.frame(inverseTemp=1/(273.16 + tempRange)), se.fit=TRUE)
plot(tempRange,pred$fit,type="l")
lines(tempRange, pred$fit + 1.96*pred$se, lty=2)
lines(tempRange, pred$fit - 1.96*pred$se, lty=2)

# Transform to scale of response
plot(tempRange,exp(pred$fit),type="l")
lines(tempRange, exp(pred$fit + 1.96*pred$se), lty=2)
lines(tempRange, exp(pred$fit - 1.96*pred$se), lty=2)

# Compute the deviance "by hand"
thetahat <- exp(weibreg$linear.predictors)
alpha <- 1/weibreg$scale
y <- FailureT
D <- 2*sum((y/thetahat)^alpha - delta*(1+alpha*log(y/thetahat)))
D

# the deviance computed from the deviance residuals
res <- residuals(weibreg, type="deviance")
sum(res^2)

# Assessing the Arrenius assumption by plotting deviance residuals against the stressor
res <- residuals(weibreg, type="deviance")
plot(res~inverseTemp)
smoothSEcurve(res, inverseTemp)

# Assessing the Weibull and Arrenius assumption using probability plots (as in Slides 13)
Rhat <- survfit(Surv(FailureT,delta) ~ inverseTemp) # K-M estimates of R(t) for each unique temp
plot(NA,xlim=log(range(FailureT)), ylim=c(-3,1), xlab="log(time)", ylab="log(-log R(t))")
for (i in 1:length(unique(inverseTemp))) {
  points(log(Rhat[i]$time),log(-log(Rhat[i]$surv)), type="s") # cloglog transformed K-M vs. log time
  x <- unique(inverseTemp)[i]
  mu <- weibreg$coefficients %*% c(1,x)
  sigma <- weibreg$scale
  theta <- exp(mu)
  alpha <- 1/sigma
  abline(-alpha*log(theta),alpha)
}

# March 30
# Parametric MLE for nonhomogeneous Poisson process (NHPPs)
# Grampus data, time in days of valve seat replacements in 41 diesel locomotivies
# from p. 119 in
# https://books.google.no/books?id=DQBqbl40EXUC&pg=PA118&lpg=PA118&dq=grampus+data+recurrent+events&source=bl&ots=zNXVcwdZYv&sig=TtQXZYxef4FSb3QZN-cT_uT8xT8&hl=no&sa=X&ved=0ahUKEwjdjcic1PvSAhVCXiwKHR59BwMQ6AEIIDAA#v=onepage&q=grampus%20data%20recurrent%20events&f=false
# s1,s2,.. are replacement times and tau the service time
cat("
j, s1 ,s2 ,s3 ,s4 ,tau
1 ,   ,   ,   ,   ,761
2 ,   ,   ,   ,   ,759
3 ,98 ,   ,   ,   ,667
4 ,326,643,653,   ,667
5 ,   ,   ,   ,   ,665
6 ,84 ,   ,   ,   ,667
7 ,87 ,   ,   ,   ,663
8 ,646,   ,   ,   ,653
9 ,   ,   ,   ,   ,651
10,92 ,   ,   ,   ,653
11,258,328,377,621,650
12,61 ,539,   ,   ,648
13,254,276,298,640,644
14,76 ,539,   ,   ,642
15,635,   ,   ,   ,641
16,349,404,561,   ,649
17,   ,   ,   ,   ,631
18,   ,   ,   ,   ,596
19,120,479,   ,   ,614
20,323,447,   ,   ,582
21,139,139,   ,   ,589
22,   ,   ,   ,   ,593
23,573,   ,   ,   ,589
24,165,408,604,   ,606
25,249,   ,   ,   ,594
26,344,497,   ,   ,613
27,265,586,   ,   ,595
28,166,206,348,   ,389
29,   ,   ,   ,   ,601
30,410,581,   ,   ,601
31,   ,   ,   ,   ,611
32,   ,   ,   ,   ,608
33,   ,   ,   ,   ,587
34,367,   ,   ,   ,603
35,202,563,570,   ,585
36,   ,   ,   ,   ,587
37,   ,   ,   ,   ,578
38,   ,   ,   ,   ,578
39,   ,   ,   ,   ,586
40,   ,   ,   ,   ,585
41,   ,   ,   ,   ,582
", file="/tmp/temp.dat")
data <- read.table("/tmp/temp.dat",na.strings="-  ",header=TRUE,sep=",")
tau <- data[,6]
s <- data[,2:5]

# Plot the data
par(mfrow=c(2,1))
plot(NA,xlim=c(0,max(tau)),ylim=c(0,length(tau)), xlab="time",ylab="unit (locomotive)")
for (j in 1:length(tau)) {
  lines(c(0,tau[j]),c(j,j))
  ss <- s[j,]
  ss <- ss[!is.na(ss)]
  points(ss,rep(j,length(ss)))
}


# List of functions specifying a power law NHPP
powerlaw <- list(
  logw=function(t,par) log(par[2]) - log(par[1]) + (par[2]-1)*log(t/par[1]),
  W=function(t,par) (t/par[1])^par[2]
)
# List of functions specifying a log linear NHPP
loglinear <- list(
  logw=function(t,par) par[1]+par[2]*t,
  W=function(t,par) {
    if (par[2]==0)
      exp(par[1])*t
    else
      exp(par[1])/par[2]*(exp(par[2]*t)-1)
  }
)
# log likelihood of data from NHPP with log intensity function logw and
# mean function W contained the list model
negloglik <- function(par,model,s,tau) {
  tmp <- -sum(model$W(tau, par)) + sum(model$logw(s, par),na.rm=TRUE)
  -tmp
}

powerlawfit <- optim(c(.01,1), negloglik, model=powerlaw, s=s, tau=tau, lower=c(.001,.001),hessian=TRUE)
powerlawfit
se <- sqrt(diag(solve(powerlawfit$hessian)))
cbind(powerlawfit$par,se)
# Wald test of H_0 : beta = 1 (HPP)
z <- (powerlawfit$par[2]-1)/se[2]
z
2*pnorm(abs(z),lower.tail=FALSE) # p-value


loglinfit <- optim(c(log(.001),0), negloglik, model=loglinear, s=s, tau=tau, hessian=TRUE)
se <- sqrt(diag(solve(loglinfit$hessian)))
cbind(loglinfit$par,se)
# Wald test of H_0 : beta = 0 (HPP)
z <- (loglinfit$par[2])/se[2]
z
2*pnorm(abs(z),lower.tail=FALSE) # p-value

# Plot the two models
curve(exp(loglinear$logw(x,loglinfit$par)),to=max(tau),ylim=c(0,.005))
curve(exp(powerlaw$logw(x,powerlawfit$par)),to=max(tau),ylim=c(0,.005),add=TRUE)