August 18: Introductory binary regression example

#Beetle data example: Fraction dying as function of increasing insecticide dosage.
library(investr)
beetle

##    ldose  n  y
## 1 1.6907 59  6
## 2 1.7242 60 13
## 3 1.7552 62 18
## 4 1.7842 56 28
## 5 1.8113 63 52
## 6 1.8369 59 53
## 7 1.8610 62 61
## 8 1.8839 60 60

attach(beetle)
frac <- y/n
plot(ldose, frac, ylim=c(0,1))

# The shortcomings of linear regression:
mod <- lm(frac ~ ldose)
summary(mod)

## 
## Call:
## lm(formula = frac ~ ldose)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10816 -0.06063  0.00263  0.05119  0.12818 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.9478     0.8717  -10.27 4.99e-05 ***
## ldose         5.3249     0.4857   10.96 3.42e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08669 on 6 degrees of freedom
## Multiple R-squared:  0.9524, Adjusted R-squared:  0.9445 
## F-statistic: 120.2 on 1 and 6 DF,  p-value: 3.422e-05

abline(mod, col="red")

# Glm binomial response
mod2 <- glm(cbind(y,n-y) ~ ldose, family=binomial)
summary(mod2)

## 
## Call:
## glm(formula = cbind(y, n - y) ~ ldose, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5941  -0.3944   0.8329   1.2592   1.5940  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -60.717      5.181  -11.72   <2e-16 ***
## ldose         34.270      2.912   11.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 284.202  on 7  degrees of freedom
## Residual deviance:  11.232  on 6  degrees of freedom
## AIC: 41.43
## 
## Number of Fisher Scoring iterations: 4

beta <- coef(mod2)
curve(1/(1+exp(-(beta[1] + beta[2]*x))), add=TRUE, col="darkgreen")

# Parameters of glm are estimated by maximum likelihood, alternative 
# methods using optim in R (not necessarily computationally effective)
l <- function(beta, data) {
  y <- data$y
  n <- data$n
  x <- data$ldose
  eta <- beta[1] + beta[2]*x
  p <- 1/(1+exp(-eta))
  sum(dbinom(y, size=n, prob=p, log=TRUE))
}
optim(c(0,0), l, control=list(fnscale=-1), data=beetle)

## $par
## [1] -60.72168  34.27267
## 
## $value
## [1] -18.71514
## 
## $counts
## function gradient 
##       93       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

detach(beetle)

August 25: Distributions associated with the linear model (the chi-square, t- and F-distribution)

library(gamlss.data)

## 
## Attaching package: 'gamlss.data'

## The following object is masked from 'package:datasets':
## 
##     sleep

head(rent99)

##       rent   rentsqm area yearc location bath kitchen cheating district
## 1 109.9487  4.228797   26  1918        2    0       0        0      916
## 2 243.2820  8.688646   28  1918        2    0       0        1      813
## 3 261.6410  8.721369   30  1918        1    0       0        1      611
## 4 106.4103  3.547009   30  1918        2    0       0        0     2025
## 5 133.3846  4.446154   30  1918        2    0       0        1      561
## 6 339.0256 11.300851   30  1918        2    0       0        1      541

summary(rent99)

##       rent            rentsqm             area            yearc      location
##  Min.   :  40.51   Min.   : 0.4158   Min.   : 20.00   Min.   :1918   1:1794  
##  1st Qu.: 322.03   1st Qu.: 5.2610   1st Qu.: 51.00   1st Qu.:1939   2:1210  
##  Median : 426.97   Median : 6.9802   Median : 65.00   Median :1959   3:  78  
##  Mean   : 459.44   Mean   : 7.1113   Mean   : 67.37   Mean   :1956           
##  3rd Qu.: 559.36   3rd Qu.: 8.8408   3rd Qu.: 81.00   3rd Qu.:1972           
##  Max.   :1843.38   Max.   :17.7216   Max.   :160.00   Max.   :1997           
##  bath     kitchen  cheating    district   
##  0:2891   0:2951   0: 321   Min.   : 113  
##  1: 191   1: 131   1:2761   1st Qu.: 561  
##                             Median :1025  
##                             Mean   :1170  
##                             3rd Qu.:1714  
##                             Max.   :2529

attach(rent99)

## The following object is masked from package:gamlss.data:
## 
##     rent

fit1 <- lm(rentsqm ~ area + yearc + location + bath + kitchen)
summary(fit1)

## 
## Call:
## lm(formula = rentsqm ~ area + yearc + location + bath + kitchen)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5669 -1.4567 -0.0971  1.3971  9.1379 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -60.918362   3.524340 -17.285  < 2e-16 ***
## area         -0.031776   0.001697 -18.729  < 2e-16 ***
## yearc         0.035647   0.001785  19.975  < 2e-16 ***
## location2     0.730451   0.079084   9.236  < 2e-16 ***
## location3     1.747705   0.243093   7.189 8.13e-13 ***
## bath1         0.870692   0.162052   5.373 8.33e-08 ***
## kitchen1      1.166856   0.188532   6.189 6.85e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.091 on 3075 degrees of freedom
## Multiple R-squared:  0.2643, Adjusted R-squared:  0.2629 
## F-statistic: 184.1 on 6 and 3075 DF,  p-value: < 2.2e-16

levels(rent99$location)

## [1] "1" "2" "3"

head(model.matrix(~location, data=rent99))

##   (Intercept) location2 location3
## 1           1         1         0
## 2           1         1         0
## 3           1         0         0
## 4           1         1         0
## 5           1         1         0
## 6           1         1         0

fit0 <- lm(rentsqm ~ area + yearc + bath + kitchen, data=rent99)
summary(fit0)

## 
## Call:
## lm(formula = rentsqm ~ area + yearc + bath + kitchen, data = rent99)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5989 -1.5277 -0.1305  1.4077  9.3348 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -55.867637   3.543312 -15.767  < 2e-16 ***
## area         -0.030346   0.001722 -17.621  < 2e-16 ***
## yearc         0.033184   0.001796  18.476  < 2e-16 ***
## bath1         0.915198   0.165109   5.543 3.22e-08 ***
## kitchen1      1.166344   0.192131   6.071 1.43e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.132 on 3077 degrees of freedom
## Multiple R-squared:  0.2353, Adjusted R-squared:  0.2343 
## F-statistic: 236.7 on 4 and 3077 DF,  p-value: < 2.2e-16

anova(fit1,fit0)

## Analysis of Variance Table
## 
## Model 1: rentsqm ~ area + yearc + location + bath + kitchen
## Model 2: rentsqm ~ area + yearc + bath + kitchen
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1   3075 13451                                  
## 2   3077 13981 -2   -530.06 60.589 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

detach(rent99)

August 31: Treatment contrasts, interactions between categorical and numerical covariates

library(ISwR)
?hellung
head(hellung)

##   glucose   conc diameter
## 1       1 631000     21.2
## 2       1 592000     21.5
## 3       1 563000     21.3
## 4       1 475000     21.0
## 5       1 461000     21.5
## 6       1 416000     21.3

attach(hellung)
glucose <- factor(glucose)
plot(conc,diameter,col=glucose)

plot(log(conc),log(diameter),col=glucose)

glucose

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2
## [39] 2 2 2 2 2 2 2 2 2 2 2 2 2
## Levels: 1 2

mod <- lm(log(diameter) ~ log(conc) + glucose + log(conc):glucose)
summary(mod)

## 
## Call:
## lm(formula = log(diameter) ~ log(conc) + glucose + log(conc):glucose)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.061530 -0.011254  0.000129  0.008675  0.040543 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.756307   0.031957 117.543   <2e-16 ***
## log(conc)          -0.053196   0.002807 -18.954   <2e-16 ***
## glucose2            0.007869   0.054559   0.144    0.886    
## log(conc):glucose2 -0.006480   0.004821  -1.344    0.185    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02086 on 47 degrees of freedom
## Multiple R-squared:  0.9361, Adjusted R-squared:  0.9321 
## F-statistic: 229.6 on 3 and 47 DF,  p-value: < 2.2e-16

model.matrix( ~ log(conc) + glucose + log(conc):glucose)

##    (Intercept) log(conc) glucose2 log(conc):glucose2
## 1            1 13.355061        0           0.000000
## 2            1 13.291262        0           0.000000
## 3            1 13.241035        0           0.000000
## 4            1 13.071070        0           0.000000
## 5            1 13.041153        0           0.000000
## 6            1 12.938441        0           0.000000
## 7            1 12.860999        0           0.000000
## 8            1 12.679196        0           0.000000
## 9            1 12.618182        0           0.000000
## 10           1 12.201060        0           0.000000
## 11           1 12.180755        0           0.000000
## 12           1 12.013701        0           0.000000
## 13           1 11.827736        0           0.000000
## 14           1 11.775290        0           0.000000
## 15           1 11.407565        0           0.000000
## 16           1 11.097410        0           0.000000
## 17           1 10.915088        0           0.000000
## 18           1 10.858999        0           0.000000
## 19           1 10.736397        0           0.000000
## 20           1 10.621327        0           0.000000
## 21           1 10.545341        0           0.000000
## 22           1 10.434116        0           0.000000
## 23           1 10.373491        0           0.000000
## 24           1 10.308953        0           0.000000
## 25           1 10.239960        0           0.000000
## 26           1  9.952278        0           0.000000
## 27           1  9.903488        0           0.000000
## 28           1  9.680344        0           0.000000
## 29           1  9.581904        0           0.000000
## 30           1  9.510445        0           0.000000
## 31           1  9.358760        0           0.000000
## 32           1  9.305651        0           0.000000
## 33           1 13.353475        1          13.353475
## 34           1 13.124361        1          13.124361
## 35           1 12.712890        1          12.712890
## 36           1 12.560244        1          12.560244
## 37           1 12.211060        1          12.211060
## 38           1 12.072541        1          12.072541
## 39           1 11.767568        1          11.767568
## 40           1 11.617285        1          11.617285
## 41           1 11.264464        1          11.264464
## 42           1 11.156251        1          11.156251
## 43           1 11.141862        1          11.141862
## 44           1 11.034890        1          11.034890
## 45           1 10.463103        1          10.463103
## 46           1 10.203592        1          10.203592
## 47           1 10.085809        1          10.085809
## 48           1  9.998798        1           9.998798
## 49           1  9.546813        1           9.546813
## 50           1  9.472705        1           9.472705
## 51           1  9.305651        1           9.305651
## attr(,"assign")
## [1] 0 1 2 3
## attr(,"contrasts")
## attr(,"contrasts")$glucose
## [1] "contr.treatment"

# Interactions needs to be removed first!
drop1(mod, test="F")

## Single term deletions
## 
## Model:
## log(diameter) ~ log(conc) + glucose + log(conc):glucose
##                   Df  Sum of Sq      RSS     AIC F value Pr(>F)
## <none>                          0.020448 -390.91               
## log(conc):glucose  1 0.00078615 0.021234 -390.98   1.807 0.1853

mod0 <- lm(log(diameter) ~ log(conc) + glucose)
summary(mod0)

## 
## Call:
## lm(formula = log(diameter) ~ log(conc) + glucose)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.058123 -0.013201 -0.000449  0.011270  0.043550 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.781150   0.026290  143.83  < 2e-16 ***
## log(conc)   -0.055393   0.002301  -24.07  < 2e-16 ***
## glucose2    -0.065020   0.006095  -10.67 2.93e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02103 on 48 degrees of freedom
## Multiple R-squared:  0.9337, Adjusted R-squared:  0.9309 
## F-statistic: 337.9 on 2 and 48 DF,  p-value: < 2.2e-16

drop1(mod0, test="F")

## Single term deletions
## 
## Model:
## log(diameter) ~ log(conc) + glucose
##           Df Sum of Sq      RSS     AIC F value    Pr(>F)    
## <none>                 0.021234 -390.98                      
## log(conc)  1  0.256353 0.277587 -261.89  579.49 < 2.2e-16 ***
## glucose    1  0.050335 0.071569 -331.01  113.78 2.932e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Rerun and notice difference with
contrasts(glucose) <- "contr.sum"
contrasts(glucose) <- "contr.treatment"
detach(hellung)

Binary regression with probit link

library(ISwR)
data(juul)
juul$menarche <- factor(juul$menarche, labels=c("No","Yes"))
juul.girl <- subset(juul,age>8 & age<20 & complete.cases(menarche))
attach(juul.girl)
plot(age, menarche)

mod <- glm(menarche ~ age, binomial(link="probit"))
logLik(mod)

## 'log Lik.' -98.69505 (df=2)

mod2 <- glm(menarche ~ age, binomial(link="logit"))
logLik(mod2)

## 'log Lik.' -100.3321 (df=2)

summary(mod)

## 
## Call:
## glm(formula = menarche ~ age, family = binomial(link = "probit"))
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.32986  -0.15223   0.00028   0.07228   2.48281  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -11.37033    1.06346  -10.69   <2e-16 ***
## age           0.86233    0.08106   10.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 719.39  on 518  degrees of freedom
## Residual deviance: 197.39  on 517  degrees of freedom
## AIC: 201.39
## 
## Number of Fisher Scoring iterations: 8

detach(juul.girl)

Sept. 15: Hypothesis testing (Wald vs LR-tests) of linear hypotheses based and models selection via AIC on example from Exam 2018.

Aim: Is there an elevated risk of lung cancer in Fredericia caused by the petrochemical factory?

library(ISwR)
data("eba1977")
eba1977

##          city   age  pop cases
## 1  Fredericia 40-54 3059    11
## 2     Horsens 40-54 2879    13
## 3     Kolding 40-54 3142     4
## 4       Vejle 40-54 2520     5
## 5  Fredericia 55-59  800    11
## 6     Horsens 55-59 1083     6
## 7     Kolding 55-59 1050     8
## 8       Vejle 55-59  878     7
## 9  Fredericia 60-64  710    11
## 10    Horsens 60-64  923    15
## 11    Kolding 60-64  895     7
## 12      Vejle 60-64  839    10
## 13 Fredericia 65-69  581    10
## 14    Horsens 65-69  834    10
## 15    Kolding 65-69  702    11
## 16      Vejle 65-69  631    14
## 17 Fredericia 70-74  509    11
## 18    Horsens 70-74  634    12
## 19    Kolding 70-74  535     9
## 20      Vejle 70-74  539     8
## 21 Fredericia   75+  605    10
## 22    Horsens   75+  782     2
## 23    Kolding   75+  659    12
## 24      Vejle   75+  619     7

attach(eba1977)
levels(city)

## [1] "Fredericia" "Horsens"    "Kolding"    "Vejle"

mod11 <- glm(cbind(cases, pop-cases) ~ age + city, binomial) # 137.75
summary(mod11)

## 
## Call:
## glm(formula = cbind(cases, pop - cases) ~ age + city, family = binomial)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.64532  -0.67472  -0.03449   0.37480   1.85912  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.6262     0.2008 -28.021  < 2e-16 ***
## age55-59      1.1070     0.2490   4.445 8.77e-06 ***
## age60-64      1.5291     0.2325   6.577 4.81e-11 ***
## age65-69      1.7819     0.2305   7.732 1.06e-14 ***
## age70-74      1.8727     0.2365   7.918 2.42e-15 ***
## age75+        1.4289     0.2512   5.688 1.29e-08 ***
## cityHorsens  -0.3345     0.1827  -1.830   0.0672 .  
## cityKolding  -0.3764     0.1890  -1.991   0.0465 *  
## cityVejle    -0.2760     0.1891  -1.459   0.1444    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 130.999  on 23  degrees of freedom
## Residual deviance:  23.638  on 15  degrees of freedom
## AIC: 137.74
## 
## Number of Fisher Scoring iterations: 5

# Selection via hypothesis testing (drop1, anova, add1,...)
drop1(mod11, test="LRT") # drop1 suggest removing insignificant city effect

## Single term deletions
## 
## Model:
## cbind(cases, pop - cases) ~ age + city
##        Df Deviance    AIC     LRT Pr(>Chi)    
## <none>      23.638 137.74                     
## age     5  127.577 231.68 103.939   <2e-16 ***
## city    3   28.559 136.66   4.921   0.1776    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mod10 <- glm(cbind(cases, pop-cases) ~ age , binomial) # 137.75
summary(mod10)

## 
## Call:
## glm(formula = cbind(cases, pop - cases) ~ age, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8634  -0.6463  -0.1072   0.7877   1.5494  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.8594     0.1743 -33.612  < 2e-16 ***
## age55-59      1.0879     0.2488   4.373 1.23e-05 ***
## age60-64      1.5117     0.2323   6.509 7.59e-11 ***
## age65-69      1.7639     0.2302   7.663 1.81e-14 ***
## age70-74      1.8626     0.2363   7.882 3.23e-15 ***
## age75+        1.4171     0.2510   5.645 1.65e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 130.999  on 23  degrees of freedom
## Residual deviance:  28.559  on 18  degrees of freedom
## AIC: 136.66
## 
## Number of Fisher Scoring iterations: 4

drop1(mod10, test="LRT") # but age should be retained in the model

## Single term deletions
## 
## Model:
## cbind(cases, pop - cases) ~ age
##        Df Deviance    AIC    LRT  Pr(>Chi)    
## <none>      28.559 136.66                     
## age     5  130.999 229.10 102.44 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# model with onlyl city included
mod01 <- glm(cbind(cases, pop-cases) ~ city , binomial) 
drop1(mod01, test="LRT") # then city is even more insigificant

## Single term deletions
## 
## Model:
## cbind(cases, pop - cases) ~ city
##        Df Deviance    AIC    LRT Pr(>Chi)
## <none>      127.58 231.68                
## city    3   131.00 229.10 3.4224    0.331

mod00 <- glm(cbind(cases, pop-cases) ~ 1 , binomial) 

# Model selection via AIC (automatic via step())
AIC(mod11)

## [1] 137.7381

AIC(mod01)

## [1] 231.6771

AIC(mod10) # has the smallest AIC value

## [1] 136.6595

AIC(mod00)

## [1] 229.0995

# Testing the linear hypothesis that there is no difference between Horsens, Kolding and Vejle
# Wald test
betahat <- coef(mod11)
betahat

## (Intercept)    age55-59    age60-64    age65-69    age70-74      age75+ 
##  -5.6262484   1.1069851   1.5290827   1.7819234   1.8727215   1.4288762 
## cityHorsens cityKolding   cityVejle 
##  -0.3344684  -0.3763612  -0.2760336

d <- c(0,0)
C <- rbind(c(0,0,0,0,0,0,1,-1,0),
           c(0,0,0,0,0,0,0,1,-1))
wald <- t(C %*% betahat - d) %*% solve(C %*% vcov(mod11) %*% t(C)) %*% (C %*% betahat - d)
wald

##           [,1]
## [1,] 0.2561111

pchisq(wald, df=2, lower.tail=FALSE)

##           [,1]
## [1,] 0.8798045

# LRT of the linear hypothesis
# this requires fitting the model under this linear hypothesis
fredericia <- factor(city=="Fredericia")
mod11b <- glm(cbind(cases, pop-cases) ~ age + fredericia, binomial)
anova(mod11b,mod11, test="LRT") # the p-value is slightly different from the Wald test (as the tests are based on different asymptotic approximations)

## Analysis of Deviance Table
## 
## Model 1: cbind(cases, pop - cases) ~ age + fredericia
## Model 2: cbind(cases, pop - cases) ~ age + city
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1        17     23.893                     
## 2        15     23.638  2  0.25532   0.8802

summary(mod11b) # Wald test of beta_fredericia = 0 is in the summary

## 
## Call:
## glm(formula = cbind(cases, pop - cases) ~ age + fredericia, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6568  -0.6231   0.0193   0.3694   1.8494  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -5.9575     0.1812 -32.873  < 2e-16 ***
## age55-59         1.1073     0.2490   4.447 8.71e-06 ***
## age60-64         1.5308     0.2325   6.585 4.55e-11 ***
## age65-69         1.7829     0.2304   7.739 1.00e-14 ***
## age70-74         1.8751     0.2364   7.931 2.18e-15 ***
## age75+           1.4304     0.2512   5.695 1.23e-08 ***
## fredericiaTRUE   0.3300     0.1491   2.213   0.0269 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 130.999  on 23  degrees of freedom
## Residual deviance:  23.893  on 17  degrees of freedom
## AIC: 133.99
## 
## Number of Fisher Scoring iterations: 4

anova(mod11b,mod10, test="LRT") # LRT of beta_fredrecia = 0, again slight difference between LRT and Wald

## Analysis of Deviance Table
## 
## Model 1: cbind(cases, pop - cases) ~ age + fredericia
## Model 2: cbind(cases, pop - cases) ~ age
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1        17     23.893                       
## 2        18     28.559 -1  -4.6662  0.03076 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Testing the model against the saturated model:
# p-value goodness of fit 
pchisq(mod11b$deviance, df=17, lower.tail=FALSE)

## [1] 0.1223693

detach(eba1977)

Sept. 22: Toy example showing Fisher scoring outperforming Newton-Raphson for Poisson regression with identity link

n <- 3
x <- c(-1,0,1)
y <- c(2,3,0) # y_3=0 causes the MLEs to lie on the boundary of the parameter space
plot(x,y)
mod <- glm(y~x, poisson(link="identity"))
betahat <- coef(mod)
abline(mod)

plot(NA,xlim=c(-3,3),ylim=c(-1,3),xlab="beta1",ylab="beta0")
for (i in 1:n) abline(a=0, b=-x[i])
points(betahat[2],betahat[1])
l <- function(beta0,beta1,y,x) {
  res <- sum(dpois(y,beta0 + beta1*x,log=TRUE))
  if (is.na(res)) 0 else res
}
emdbook::curve3d(l(beta0,beta1,y,x),
                 varnames = c("beta1","beta0"),
                 add=TRUE,sys3d = "contour",
                 xlim=c(-3,3),ylim=c(-1,3),
                 levels=c(-10:0),n = 200)

myglm <- function(y,x,fisherscoring=TRUE,start=c(1,0),maxiter=10) {
  X <- model.matrix(~x)
  beta <- start
  i <- 0
  repeat {
    eta <- as.vector(X %*% beta) 
    lambda <- eta
    score <- apply((y/lambda - 1)*X, 2, sum)
    if (sum(score^2)<1e-5 | i==maxiter) # bad stopping criteria if MLE is on the boundary 
      break()
    if (fisherscoring) # compute expected fisher info
      Z <- sqrt(1/lambda)*X
    else # or observed observed fisher info
      Z <- sqrt(y/lambda^2)*X
    fisherinfo <-  t(Z) %*% Z
    beta <- beta + solve(fisherinfo) %*% score
    i <- i+1
    cat(beta,score, "\n")
  }
  list(beta, score)
}
myglm(y,x,start=c(1,0)) # Fisher scoring

## 1.666667 -1 2 -2 
## 1.666667 -1.4 -0.45 -0.75 
## 1.666667 -1.56 -0.5478261 -0.6521739 
## 1.666667 -1.624 -0.5801653 -0.6198347 
## 1.666667 -1.6496 -0.5922204 -0.6077796 
## 1.666667 -1.65984 -0.5969122 -0.6030878 
## 1.666667 -1.663936 -0.5987687 -0.6012313 
## 1.666667 -1.665574 -0.5995081 -0.6004919 
## 1.666667 -1.66623 -0.5998033 -0.6001967 
## 1.666667 -1.666492 -0.5999213 -0.6000787

## [[1]]
##                  [,1]
## (Intercept)  1.666667
## x           -1.666492
## 
## [[2]]
## (Intercept)           x 
##  -0.5999685  -0.6000315

myglm(y,x,fisherscoring = FALSE, start=c(1,0)) # Newton-Raphson (diverges for the same staring values)

## 1 -1 2 -2 
## 1 -3 1 -1 
## 1 -7 0.5 -0.5 
## 1 -15 0.25 -0.25 
## 1 -31 0.125 -0.125 
## 1 -63 0.0625 -0.0625 
## 1 -127 0.03125 -0.03125 
## 1 -255 0.015625 -0.015625 
## 1 -511 0.0078125 -0.0078125 
## 1 -1023 0.00390625 -0.00390625

## [[1]]
##              [,1]
## (Intercept)     1
## x           -1023
## 
## [[2]]
##  (Intercept)            x 
##  0.001953125 -0.001953125

Sept. 29: Quasilikelihood

library(ISwR)
data("eba1977")
attach(eba1977)
fredericia <- factor(city=="Fredericia")
mod11b <- glm(cbind(cases, pop-cases) ~ age + fredericia, binomial)
mod11bq <- glm(cbind(cases, pop-cases) ~ age + fredericia, quasibinomial)
summary(mod11b)

## 
## Call:
## glm(formula = cbind(cases, pop - cases) ~ age + fredericia, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6568  -0.6231   0.0193   0.3694   1.8494  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -5.9575     0.1812 -32.873  < 2e-16 ***
## age55-59         1.1073     0.2490   4.447 8.71e-06 ***
## age60-64         1.5308     0.2325   6.585 4.55e-11 ***
## age65-69         1.7829     0.2304   7.739 1.00e-14 ***
## age70-74         1.8751     0.2364   7.931 2.18e-15 ***
## age75+           1.4304     0.2512   5.695 1.23e-08 ***
## fredericiaTRUE   0.3300     0.1491   2.213   0.0269 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 130.999  on 23  degrees of freedom
## Residual deviance:  23.893  on 17  degrees of freedom
## AIC: 133.99
## 
## Number of Fisher Scoring iterations: 4

summary(mod11bq)

## 
## Call:
## glm(formula = cbind(cases, pop - cases) ~ age + fredericia, family = quasibinomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6568  -0.6231   0.0193   0.3694   1.8494  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5.9575     0.2098 -28.399 9.12e-16 ***
## age55-59         1.1073     0.2882   3.842  0.00131 ** 
## age60-64         1.5308     0.2691   5.689 2.66e-05 ***
## age65-69         1.7829     0.2667   6.686 3.83e-06 ***
## age70-74         1.8751     0.2737   6.851 2.81e-06 ***
## age75+           1.4304     0.2907   4.920  0.00013 ***
## fredericiaTRUE   0.3300     0.1726   1.912  0.07290 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.339883)
## 
##     Null deviance: 130.999  on 23  degrees of freedom
## Residual deviance:  23.893  on 17  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

drop1(mod11b,test="LRT") # Here we need F-tests

## Single term deletions
## 
## Model:
## cbind(cases, pop - cases) ~ age + fredericia
##            Df Deviance    AIC     LRT Pr(>Chi)    
## <none>          23.893 133.99                     
## age         5  128.183 228.28 104.290  < 2e-16 ***
## fredericia  1   28.559 136.66   4.666  0.03076 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

drop1(mod11bq, test="F")

## Single term deletions
## 
## Model:
## cbind(cases, pop - cases) ~ age + fredericia
##            Df Deviance F value    Pr(>F)    
## <none>          23.893                      
## age         5  128.183  14.841 1.095e-05 ***
## fredericia  1   28.559   3.320   0.08608 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Linear separation

# Linear separation.  This is typically encountered in situations where there are
# few observation for some levels of a catogorical covariate.  
# Minimal example: minimal example with one covariate
x <- 0:10
y <- c(0,0,0,0,1,1,1,1,1,1,1)
plot(x,y)

mod <- glm(y ~ x, binomial)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(mod)

## 
## Call:
## glm(formula = y ~ x, family = binomial)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -2.375e-05  -2.110e-08   2.110e-08   2.110e-08   2.454e-05  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -153.73  178516.48  -0.001    0.999
## x               43.91   50261.24   0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.4421e+01  on 10  degrees of freedom
## Residual deviance: 1.1663e-09  on  9  degrees of freedom
## AIC: 4
## 
## Number of Fisher Scoring iterations: 25

drop1(mod, test="LRT")

## Single term deletions
## 
## Model:
## y ~ x
##        Df Deviance    AIC    LRT  Pr(>Chi)    
## <none>       0.000  4.000                     
## x       1   14.421 16.421 14.421 0.0001462 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# less obvious example, two covariates
x1 <- rep(1:10,2)
x2 <- rep(c(0,1),c(10,10))
y <- c(1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0)
data.frame(y,x1,x2)

##    y x1 x2
## 1  1  1  0
## 2  1  2  0
## 3  1  3  0
## 4  0  4  0
## 5  0  5  0
## 6  0  6  0
## 7  0  7  0
## 8  0  8  0
## 9  0  9  0
## 10 0 10  0
## 11 1  1  1
## 12 1  2  1
## 13 1  3  1
## 14 1  4  1
## 15 1  5  1
## 16 1  6  1
## 17 0  7  1
## 18 0  8  1
## 19 0  9  1
## 20 0 10  1

summary(glm(y ~ x1, binomial))

## 
## Call:
## glm(formula = y ~ x1, family = binomial)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.71808  -0.27120  -0.06699   0.27193   1.72056  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   6.0930     2.8436   2.143   0.0321 *
## x1           -1.2191     0.5455  -2.235   0.0254 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 27.526  on 19  degrees of freedom
## Residual deviance: 10.663  on 18  degrees of freedom
## AIC: 14.663
## 
## Number of Fisher Scoring iterations: 6

summary(glm(y ~ x2, binomial))

## 
## Call:
## glm(formula = y ~ x2, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3537  -0.8446  -0.8446   1.0108   1.5518  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.8473     0.6901  -1.228    0.220
## x2            1.2528     0.9449   1.326    0.185
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 27.526  on 19  degrees of freedom
## Residual deviance: 25.678  on 18  degrees of freedom
## AIC: 29.678
## 
## Number of Fisher Scoring iterations: 4

summary(mod <- glm(y ~ x1 + x2, binomial))

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -2.258e-05  -2.110e-08  -2.110e-08   2.110e-08   2.226e-05  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)    155.07  140546.83   0.001    0.999
## x1             -44.29   39066.66  -0.001    0.999
## x2             132.79  122010.82   0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2.7526e+01  on 19  degrees of freedom
## Residual deviance: 1.9320e-09  on 17  degrees of freedom
## AIC: 6
## 
## Number of Fisher Scoring iterations: 25

drop1(mod, test="LRT")

## Single term deletions
## 
## Model:
## y ~ x1 + x2
##        Df Deviance    AIC    LRT  Pr(>Chi)    
## <none>       0.000  6.000                     
## x1      1   25.677 29.677 25.677 4.035e-07 ***
## x2      1   10.663 14.663 10.663  0.001093 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Oct. 5: Categorical regression models

data = "http://www.stat.ufl.edu/~aa/glm/data/Alligators.dat"
ali = read.table(data, header = T)
ali

##   lake size y1 y2 y3 y4 y5
## 1    1    1 23  4  2  2  8
## 2    1    0  7  0  1  3  5
## 3    2    1  5 11  1  0  3
## 4    2    0 13  8  6  1  0
## 5    3    1  5 11  2  1  5
## 6    3    0  8  7  6  3  5
## 7    4    1 16 19  1  2  3
## 8    4    0 17  1  0  1  3

attach(ali)
library(VGAM)

## Loading required package: stats4

## Loading required package: splines

## 
## Attaching package: 'VGAM'

## The following object is masked from 'package:investr':
## 
##     calibrate

fit11 <- vglm(cbind(y2,y3,y4,y5,y1) ~ size + factor(lake), family=multinomial)
summary(fit11)

## 
## Call:
## vglm(formula = cbind(y2, y3, y4, y5, y1) ~ size + factor(lake), 
##     family = multinomial)
## 
## Pearson residuals:
##   log(mu[,1]/mu[,5]) log(mu[,2]/mu[,5]) log(mu[,3]/mu[,5]) log(mu[,4]/mu[,5])
## 1             0.0953           0.028205           -0.54130            -0.7268
## 2            -0.5082           0.003228            0.66646             1.2589
## 3            -0.3693          -0.461102           -0.42005             1.8347
## 4             0.4125           0.249983            0.19772            -1.3779
## 5            -0.5526          -0.191149            0.07215             0.2790
## 6             0.6500           0.110694           -0.02784            -0.2828
## 7             0.6757           0.827737            0.79863            -0.3081
## 8            -1.3051          -0.802694           -0.69525             0.4629
## 
## Coefficients: 
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1    -3.2074     0.6387  -5.021 5.13e-07 ***
## (Intercept):2    -2.0718     0.7067  -2.931 0.003373 ** 
## (Intercept):3    -1.3980     0.6085  -2.297 0.021601 *  
## (Intercept):4    -1.0781     0.4709  -2.289 0.022061 *  
## size:1            1.4582     0.3959   3.683 0.000231 ***
## size:2           -0.3513     0.5800  -0.606 0.544786    
## size:3           -0.6307     0.6425  -0.982 0.326296    
## size:4            0.3316     0.4482   0.740 0.459506    
## factor(lake)2:1   2.5956     0.6597   3.934 8.34e-05 ***
## factor(lake)2:2   1.2161     0.7860   1.547 0.121824    
## factor(lake)2:3  -1.3483     1.1635  -1.159 0.246529    
## factor(lake)2:4  -0.8205     0.7296  -1.125 0.260713    
## factor(lake)3:1   2.7803     0.6712   4.142 3.44e-05 ***
## factor(lake)3:2   1.6925     0.7804   2.169 0.030113 *  
## factor(lake)3:3   0.3926     0.7818   0.502 0.615487    
## factor(lake)3:4   0.6902     0.5597   1.233 0.217511    
## factor(lake)4:1   1.6584     0.6129   2.706 0.006813 ** 
## factor(lake)4:2  -1.2428     1.1854  -1.048 0.294466    
## factor(lake)4:3  -0.6951     0.7813  -0.890 0.373608    
## factor(lake)4:4  -0.8262     0.5575  -1.482 0.138378    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,5]), log(mu[,2]/mu[,5]), 
## log(mu[,3]/mu[,5]), log(mu[,4]/mu[,5])
## 
## Residual deviance: 17.0798 on 12 degrees of freedom
## 
## Log-likelihood: -47.5138 on 12 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  5  of the response

# Oct. 11: Categorical regression models, testing nested models
fit10 <- vglm(cbind(y2,y3,y4,y5,y1) ~ size, family=multinomial)
summary(fit10)

## 
## Call:
## vglm(formula = cbind(y2, y3, y4, y5, y1) ~ size, family = multinomial)
## 
## Pearson residuals:
##                       Min      1Q   Median     3Q   Max
## log(mu[,1]/mu[,5]) -3.414 -1.6814  1.18344 1.3708 1.786
## log(mu[,2]/mu[,5]) -2.183 -0.6923 -0.03712 1.0781 1.360
## log(mu[,3]/mu[,5]) -1.029 -0.7234  0.12037 0.3921 1.447
## log(mu[,4]/mu[,5]) -1.925 -0.6523  0.26477 0.9089 1.923
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  -1.0341     0.2911  -3.553 0.000381 ***
## (Intercept):2  -1.2417     0.3149  -3.944 8.03e-05 ***
## (Intercept):3  -1.7272     0.3837  -4.502 6.74e-06 ***
## (Intercept):4  -1.2417     0.3149  -3.944 8.03e-05 ***
## size:1          0.9489     0.3569   2.659 0.007837 ** 
## size:2         -0.8583     0.5350  -1.604 0.108626    
## size:3         -0.5552     0.6063  -0.916 0.359864    
## size:4          0.2943     0.4150   0.709 0.478129    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,5]), log(mu[,2]/mu[,5]), 
## log(mu[,3]/mu[,5]), log(mu[,4]/mu[,5])
## 
## Residual deviance: 66.2129 on 24 degrees of freedom
## 
## Log-likelihood: -72.0803 on 24 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Reference group is level  5  of the response

# Change in deviance is 49
deviance(fit10) - deviance(fit11)

## [1] 49.13308

# Difference in number of parameters: lake has 4 levels and we have 5 categories
# so (4-1)*(5-1) = 12 parameters.  Hence the critical values is 5.22
qchisq(.95, df=12, lower.tail=FALSE)

## [1] 5.226029

# so the lake effect is highly significant

# The same tests removing one covariate at the time
anova(fit11,test="LRT", type="III") # type = "III" terms are removed one at the time (works the same way as drop1)

## Analysis of Deviance Table (Type III tests: each term added last)
## 
## Model: 'multinomial', 'VGAMcategorical'
## 
## Link: 'multilogitlink'
## 
## Response: cbind(y2, y3, y4, y5, y1)
## 
##              Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## size          4   21.087        16     38.167 0.0003043 ***
## factor(lake) 12   49.133        24     66.213 1.983e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Goodness of fit test (p-value)
pchisq(deviance(fit11), df = (5-1)*8 - 20)

## [1] 0.8533811

# Testing for an effect of a single categorical covariate is equivalent to 
# a test of independence in a c times n contingency table
table <- cbind(y1, y2, y3, y4, y5)
table

##      y1 y2 y3 y4 y5
## [1,] 23  4  2  2  8
## [2,]  7  0  1  3  5
## [3,]  5 11  1  0  3
## [4,] 13  8  6  1  0
## [5,]  5 11  2  1  5
## [6,]  8  7  6  3  5
## [7,] 16 19  1  2  3
## [8,] 17  1  0  1  3

table <- array(table, dim=c(2,4,5)) # reorganise the data into a 4 by 2 by 5 array
table[1,,]

##      [,1] [,2] [,3] [,4] [,5]
## [1,]   23    4    2    2    8
## [2,]    5   11    1    0    3
## [3,]    5   11    2    1    5
## [4,]   16   19    1    2    3

table <- apply(table, c(1,3), sum) # for each size group and category, sum counts across lakes
table

##      [,1] [,2] [,3] [,4] [,5]
## [1,]   49   45    6    5   19
## [2,]   45   16   13    8   13

# Do a chisquare test of independence between diet and size
chisq.test(table)

## 
##  Pearson's Chi-squared test
## 
## data:  table
## X-squared = 14.772, df = 4, p-value = 0.005198

# The analogous G-test (see https://en.wikipedia.org/wiki/G-test)
G.test <- function(x) {
  n <- sum(x)
  p.rows <- apply(x,1,sum)/n
  p.cols <- apply(x,2,sum)/n
  phat <- outer(p.rows, p.cols,"*")
  G <- 2*sum(x*log((x/n)/phat)); names(G) <- "G"
  df <- (ncol(x)-1)*(nrow(x)-1); names(df) <- "df"
  structure(list(statistic=G, 
                 parameter=df, 
                 p.value=pchisq(G, df=df, lower.tail=FALSE),
                 method="G-test"
  ),class="htest")
}
G.test(table)

## 
##  G-test
## 
## data:  
## G = 15.15, df = 4, p-value = 0.004401

# The G- and chi-square tests are both analogous to the null model against a model with size included
anova(fit11,test="LRT",type="I") # type = "I" means terms are added sequentially

## Analysis of Deviance Table (Type I tests: terms added sequentially from
## first to last)
## 
## Model: 'multinomial', 'VGAMcategorical'
## 
## Link: 'multilogitlink'
## 
## Response: cbind(y2, y3, y4, y5, y1)
## 
##              Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                            28     81.362              
## size          4   15.150        24     66.213  0.004401 ** 
## factor(lake) 12   49.133        12     17.080 1.983e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Oct. 12: Ordinal regression

library(VGAM)
# Grades in TMA4240 Statistics I last three years
data <- read.csv(text="
year,  sex,        A,    B,    C,    D,    E,   F
2016,  females,   48,  144,  186,  214,  152, 123
2016,  males,    134,  254,  327,  300,  253, 205       
2017,  females,   24,   70,  133,  134,  116, 104  
2017,  males,     80,  160,  199,  184,  193, 166
2018,  females,   11,   38,   59,   65,   60,  37
2018,  males,     36,   78,   96,   98,   98,  94
", colClasses=c("factor",rep(NA,7)))
y <- data[,-(1:2)]
meangrade <- function(x) sum((1:6)*x/sum(x))
apply(y,1,meangrade) # males have slightly better grades (smaller if A=1, B=2,...)

## [1] 3.746251 3.610319 3.963855 3.761711 3.874074 3.852000

# Ordinal regression with logit link = proportional odds model
mod <- vglm(cbind(A,B,C,D,E,F) ~ year + sex, 
            family=cumulative(
              parallel = TRUE, 
              link="logitlink"),
            data=data)
summary(mod)

## 
## Call:
## vglm(formula = cbind(A, B, C, D, E, F) ~ year + sex, family = cumulative(parallel = TRUE, 
##     link = "logitlink"), data = data)
## 
## Pearson residuals:
##   logitlink(P[Y<=1]) logitlink(P[Y<=2]) logitlink(P[Y<=3]) logitlink(P[Y<=4])
## 1            -1.8914           -0.19677            -1.2098             1.8760
## 2             1.2683           -0.01618             0.0292            -0.4399
## 3            -1.5314           -2.09333             0.3521             0.8025
## 4             1.4727            1.38137             0.8357            -1.4967
## 5            -1.2394           -0.41582             0.1131             0.7109
## 6             0.4869            0.77889            -0.1122            -1.0558
##   logitlink(P[Y<=5])
## 1             0.7614
## 2            -0.4543
## 3             0.2110
## 4            -0.6228
## 5             2.0928
## 6            -1.2521
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1 -2.56091    0.07087 -36.135  < 2e-16 ***
## (Intercept):2 -1.19548    0.05477 -21.829  < 2e-16 ***
## (Intercept):3 -0.20914    0.05162  -4.052 5.09e-05 ***
## (Intercept):4  0.66881    0.05248  12.744  < 2e-16 ***
## (Intercept):5  1.70789    0.05861  29.139  < 2e-16 ***
## year2017      -0.20728    0.05765  -3.596 0.000324 ***
## year2018      -0.23610    0.07332  -3.220 0.001281 ** 
## sex  males     0.15072    0.05353   2.816 0.004865 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  5 
## 
## Names of linear predictors: logitlink(P[Y<=1]), logitlink(P[Y<=2]), 
## logitlink(P[Y<=3]), logitlink(P[Y<=4]), logitlink(P[Y<=5])
## 
## Residual deviance: 37.0393 on 22 degrees of freedom
## 
## Log-likelihood: -110.1722 on 22 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##   year2017   year2018 sex  males 
##  0.8127914  0.7897031  1.1626757

# Goodness-of-fit p-value
pchisq(deviance(mod), df.residual(mod), lower.tail=FALSE)

## [1] 0.0234223

anova(mod, type="III", test="LRT")

## Analysis of Deviance Table (Type III tests: each term added last)
## 
## Model: 'cumulative', 'VGAMordinal', 'VGAMcategorical'
## 
## Links: 'logitlink'
## 
## Response: cbind(A, B, C, D, E, F)
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## year  2  17.7799        24     54.819 0.0001378 ***
## sex   1   8.0525        23     45.092 0.0045442 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# expected counts under the fitted model
round(fitted(mod)*apply(data[,-(1:2)], 1, sum),1)

##       A     B     C     D     E     F
## 1  62.2 139.2 186.9 185.0 160.7 133.0
## 2 121.4 262.0 331.7 307.5 251.9 198.7
## 3  34.3  80.4 116.2 125.5 118.7 105.9
## 4  66.8 151.5 207.8 210.6 187.2 158.0
## 5  15.5  36.5  53.4  58.3  55.8  50.4
## 6  33.1  75.6 104.7 107.5  96.6  82.4

data[,-(1:2)]

##     A   B   C   D   E   F
## 1  48 144 186 214 152 123
## 2 134 254 327 300 253 205
## 3  24  70 133 134 116 104
## 4  80 160 199 184 193 166
## 5  11  38  59  65  60  37
## 6  36  78  96  98  98  94

mod.nonparallell <- vglm(cbind(A,B,C,D,E,F) ~ year + sex, 
            family=cumulative(
              parallel = FALSE, 
              link="logitlink"), 
            data=data)
summary(mod.nonparallell)

## 
## Call:
## vglm(formula = cbind(A, B, C, D, E, F) ~ year + sex, family = cumulative(parallel = FALSE, 
##     link = "logitlink"), data = data)
## 
## Pearson residuals:
##   logitlink(P[Y<=1]) logitlink(P[Y<=2]) logitlink(P[Y<=3]) logitlink(P[Y<=4])
## 1            0.18402            0.96502            -0.3723          -0.064270
## 2           -0.14263           -0.64201             0.2693           0.052814
## 3           -0.19837           -1.22981             0.1892           0.008626
## 4            0.17329            0.82493            -0.1269          -0.002357
## 5           -0.07778           -0.04859             0.3876           0.091981
## 6            0.02845            0.01366            -0.2778          -0.079121
##   logitlink(P[Y<=5])
## 1            -0.3515
## 2             0.2663
## 3            -0.5086
## 4             0.3868
## 5             1.3380
## 6            -0.9718
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1 -2.88140    0.12363 -23.307  < 2e-16 ***
## (Intercept):2 -1.32314    0.06904 -19.166  < 2e-16 ***
## (Intercept):3 -0.25163    0.05682  -4.428 9.49e-06 ***
## (Intercept):4  0.77604    0.05998  12.937  < 2e-16 ***
## (Intercept):5  1.83130    0.07983  22.940  < 2e-16 ***
## year2017:1    -0.18130    0.12762  -1.421 0.155406    
## year2017:2    -0.18958    0.07813  -2.427 0.015242 *  
## year2017:3    -0.16586    0.06584  -2.519 0.011765 *  
## year2017:4    -0.25467    0.06874  -3.705 0.000212 ***
## year2017:5    -0.24763    0.08955  -2.765 0.005688 ** 
## year2018:1    -0.26963    0.16857  -1.600 0.109694    
## year2018:2    -0.20505    0.10032  -2.044 0.040963 *  
## year2018:3    -0.22197    0.08412  -2.639 0.008322 ** 
## year2018:4    -0.27464    0.08673  -3.166 0.001543 ** 
## year2018:5    -0.22838    0.11289  -2.023 0.043068 *  
## sex  males:1   0.60047    0.13050   4.601 4.20e-06 ***
## sex  males:2   0.32501    0.07425   4.377 1.20e-05 ***
## sex  males:3   0.18925    0.06137   3.084 0.002044 ** 
## sex  males:4   0.01392    0.06397   0.218 0.827768    
## sex  males:5  -0.02765    0.08390  -0.330 0.741726    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  5 
## 
## Names of linear predictors: logitlink(P[Y<=1]), logitlink(P[Y<=2]), 
## logitlink(P[Y<=3]), logitlink(P[Y<=4]), logitlink(P[Y<=5])
## 
## Residual deviance: 7.6177 on 10 degrees of freedom
## 
## Log-likelihood: -95.4615 on 10 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Exponentiated coefficients:
##   year2017:1   year2017:2   year2017:3   year2017:4   year2017:5   year2018:1 
##    0.8341834    0.8273063    0.8471686    0.7751705    0.7806506    0.7636582 
##   year2018:2   year2018:3   year2018:4   year2018:5 sex  males:1 sex  males:2 
##    0.8146094    0.8009393    0.7598429    0.7958247    1.8229729    1.3840418 
## sex  males:3 sex  males:4 sex  males:5 
##    1.2083460    1.0140147    0.9727284

anova(mod, mod.nonparallell, test="LRT", type=1)

## Analysis of Deviance Table
## 
## Model 1: cbind(A, B, C, D, E, F) ~ year + sex
## Model 2: cbind(A, B, C, D, E, F) ~ year + sex
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1        22     37.039                        
## 2        10      7.618 12   29.422  0.00341 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

October 13: Linear mixed models

library(lme4)

## Loading required package: Matrix

# Sleep study data: Reaction time of different individuals (subjects) after number of days of sleep deprivation
sleepstudy

##     Reaction Days Subject
## 1   249.5600    0     308
## 2   258.7047    1     308
## 3   250.8006    2     308
## 4   321.4398    3     308
## 5   356.8519    4     308
## 6   414.6901    5     308
## 7   382.2038    6     308
## 8   290.1486    7     308
## 9   430.5853    8     308
## 10  466.3535    9     308
## 11  222.7339    0     309
## 12  205.2658    1     309
## 13  202.9778    2     309
## 14  204.7070    3     309
## 15  207.7161    4     309
## 16  215.9618    5     309
## 17  213.6303    6     309
## 18  217.7272    7     309
## 19  224.2957    8     309
## 20  237.3142    9     309
## 21  199.0539    0     310
## 22  194.3322    1     310
## 23  234.3200    2     310
## 24  232.8416    3     310
## 25  229.3074    4     310
## 26  220.4579    5     310
## 27  235.4208    6     310
## 28  255.7511    7     310
## 29  261.0125    8     310
## 30  247.5153    9     310
## 31  321.5426    0     330
## 32  300.4002    1     330
## 33  283.8565    2     330
## 34  285.1330    3     330
## 35  285.7973    4     330
## 36  297.5855    5     330
## 37  280.2396    6     330
## 38  318.2613    7     330
## 39  305.3495    8     330
## 40  354.0487    9     330
## 41  287.6079    0     331
## 42  285.0000    1     331
## 43  301.8206    2     331
## 44  320.1153    3     331
## 45  316.2773    4     331
## 46  293.3187    5     331
## 47  290.0750    6     331
## 48  334.8177    7     331
## 49  293.7469    8     331
## 50  371.5811    9     331
## 51  234.8606    0     332
## 52  242.8118    1     332
## 53  272.9613    2     332
## 54  309.7688    3     332
## 55  317.4629    4     332
## 56  309.9976    5     332
## 57  454.1619    6     332
## 58  346.8311    7     332
## 59  330.3003    8     332
## 60  253.8644    9     332
## 61  283.8424    0     333
## 62  289.5550    1     333
## 63  276.7693    2     333
## 64  299.8097    3     333
## 65  297.1710    4     333
## 66  338.1665    5     333
## 67  332.0265    6     333
## 68  348.8399    7     333
## 69  333.3600    8     333
## 70  362.0428    9     333
## 71  265.4731    0     334
## 72  276.2012    1     334
## 73  243.3647    2     334
## 74  254.6723    3     334
## 75  279.0244    4     334
## 76  284.1912    5     334
## 77  305.5248    6     334
## 78  331.5229    7     334
## 79  335.7469    8     334
## 80  377.2990    9     334
## 81  241.6083    0     335
## 82  273.9472    1     335
## 83  254.4907    2     335
## 84  270.8021    3     335
## 85  251.4519    4     335
## 86  254.6362    5     335
## 87  245.4523    6     335
## 88  235.3110    7     335
## 89  235.7541    8     335
## 90  237.2466    9     335
## 91  312.3666    0     337
## 92  313.8058    1     337
## 93  291.6112    2     337
## 94  346.1222    3     337
## 95  365.7324    4     337
## 96  391.8385    5     337
## 97  404.2601    6     337
## 98  416.6923    7     337
## 99  455.8643    8     337
## 100 458.9167    9     337
## 101 236.1032    0     349
## 102 230.3167    1     349
## 103 238.9256    2     349
## 104 254.9220    3     349
## 105 250.7103    4     349
## 106 269.7744    5     349
## 107 281.5648    6     349
## 108 308.1020    7     349
## 109 336.2806    8     349
## 110 351.6451    9     349
## 111 256.2968    0     350
## 112 243.4543    1     350
## 113 256.2046    2     350
## 114 255.5271    3     350
## 115 268.9165    4     350
## 116 329.7247    5     350
## 117 379.4445    6     350
## 118 362.9184    7     350
## 119 394.4872    8     350
## 120 389.0527    9     350
## 121 250.5265    0     351
## 122 300.0576    1     351
## 123 269.8939    2     351
## 124 280.5891    3     351
## 125 271.8274    4     351
## 126 304.6336    5     351
## 127 287.7466    6     351
## 128 266.5955    7     351
## 129 321.5418    8     351
## 130 347.5655    9     351
## 131 221.6771    0     352
## 132 298.1939    1     352
## 133 326.8785    2     352
## 134 346.8555    3     352
## 135 348.7402    4     352
## 136 352.8287    5     352
## 137 354.4266    6     352
## 138 360.4326    7     352
## 139 375.6406    8     352
## 140 388.5417    9     352
## 141 271.9235    0     369
## 142 268.4369    1     369
## 143 257.2424    2     369
## 144 277.6566    3     369
## 145 314.8222    4     369
## 146 317.2135    5     369
## 147 298.1353    6     369
## 148 348.1229    7     369
## 149 340.2800    8     369
## 150 366.5131    9     369
## 151 225.2640    0     370
## 152 234.5235    1     370
## 153 238.9008    2     370
## 154 240.4730    3     370
## 155 267.5373    4     370
## 156 344.1937    5     370
## 157 281.1481    6     370
## 158 347.5855    7     370
## 159 365.1630    8     370
## 160 372.2288    9     370
## 161 269.8804    0     371
## 162 272.4428    1     371
## 163 277.8989    2     371
## 164 281.7895    3     371
## 165 279.1705    4     371
## 166 284.5120    5     371
## 167 259.2658    6     371
## 168 304.6306    7     371
## 169 350.7807    8     371
## 170 369.4692    9     371
## 171 269.4117    0     372
## 172 273.4740    1     372
## 173 297.5968    2     372
## 174 310.6316    3     372
## 175 287.1726    4     372
## 176 329.6076    5     372
## 177 334.4818    6     372
## 178 343.2199    7     372
## 179 369.1417    8     372
## 180 364.1236    9     372

mod <- lmer(Reaction ~ 1 + Days + (1 + Days|Subject), data=sleepstudy)
summary(mod)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ 1 + Days + (1 + Days | Subject)
##    Data: sleepstudy
## 
## REML criterion at convergence: 1743.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9536 -0.4634  0.0231  0.4634  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 612.10   24.741       
##           Days         35.07    5.922   0.07
##  Residual             654.94   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.825  36.838
## Days          10.467      1.546   6.771
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138

fixef(mod)

## (Intercept)        Days 
##   251.40510    10.46729

ranef(mod)

## $Subject
##     (Intercept)        Days
## 308   2.2585509   9.1989758
## 309 -40.3987381  -8.6196806
## 310 -38.9604090  -5.4488565
## 330  23.6906196  -4.8143503
## 331  22.2603126  -3.0699116
## 332   9.0395679  -0.2721770
## 333  16.8405086  -0.2236361
## 334  -7.2326151   1.0745816
## 335  -0.3336684 -10.7521652
## 337  34.8904868   8.6282652
## 349 -25.2102286   1.1734322
## 350 -13.0700342   6.6142178
## 351   4.5778642  -3.0152621
## 352  20.8636782   3.5360011
## 369   3.2754656   0.8722149
## 370 -25.6129993   4.8224850
## 371   0.8070461  -0.9881562
## 372  12.3145921   1.2840221
## 
## with conditional variances for "Subject"

October 27: GLMMs: Difference between marginal and conditional model Simulated data

set.seed(1)
x <- rnorm(100)
group <- factor(rep(1:10,each=10))
eta <- 1*x + rnorm(10,sd=1)[group] # tau_0^2 = 1
piprob <- pnorm(eta) # true model has probit link
y <- rbinom(n=100,size=10,prob=piprob)
plot(x,y/10,col=group)

# First fit a glm without accounting for between group variation
mod0 <- glm(cbind(y,10-y) ~ x, binomial(link="probit"))
xx <- seq(-4,4,len=100)
lines(xx,predict(mod0, newdata=data.frame(x=xx), type = "response"), lty=2)

# Fit a glmm random intecept model
mod1 <- glmer(cbind(y,10-y) ~ x + (1|group), family=binomial(link="probit"))
summary(mod1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( probit )
## Formula: cbind(y, 10 - y) ~ x + (1 | group)
## 
##      AIC      BIC   logLik deviance df.resid 
##    308.9    316.8   -151.5    302.9       97 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8463 -0.5885  0.1131  0.5910  1.5310 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  group  (Intercept) 1.051    1.025   
## Number of obs: 100, groups:  group, 10
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.41400    0.32878   1.259    0.208    
## x            1.09681    0.08357  13.125   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##   (Intr)
## x 0.022

# Plot the conditional models
for (i in levels(group)) {
  lines(xx, predict(mod1, newdata=data.frame(x=xx,group=i), type = "response"), col=i)
}
# Compute adjusted (deflated) predicted response under the marginal model.
tau0hat <- summary(mod1)$varcor$group[1,1]
beta.adjusted <- fixef(mod1)/sqrt(1+tau0hat^2)
curve(pnorm(beta.adjusted[1] + beta.adjusted[2]*x), add=TRUE)

# Standard errors are only reliable (and larger) if correctly accounting for within-cluster 
# dependent observations as compared to naive glm:
summary(mod0)

## 
## Call:
## glm(formula = cbind(y, 10 - y) ~ x, family = binomial(link = "probit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3502  -1.4709   0.1024   1.7138   4.2522  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.25900    0.04294   6.031 1.63e-09 ***
## x            0.69711    0.05466  12.753  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 607.87  on 99  degrees of freedom
## Residual deviance: 420.86  on 98  degrees of freedom
## AIC: 594.14
## 
## Number of Fisher Scoring iterations: 4

summary(mod1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( probit )
## Formula: cbind(y, 10 - y) ~ x + (1 | group)
## 
##      AIC      BIC   logLik deviance df.resid 
##    308.9    316.8   -151.5    302.9       97 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8463 -0.5885  0.1131  0.5910  1.5310 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  group  (Intercept) 1.051    1.025   
## Number of obs: 100, groups:  group, 10
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.41400    0.32878   1.259    0.208    
## x            1.09681    0.08357  13.125   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##   (Intr)
## x 0.022

# Different fitting methods (note the differences):
# Adaptive Gauss-Hermite quadrature (accurate but slow and only works with scalar random effects)
glmer(cbind(y,10-y) ~ x + (1|group), family=binomial(link="probit"), nAGQ = 100)

## Generalized linear mixed model fit by maximum likelihood (Adaptive
##   Gauss-Hermite Quadrature, nAGQ = 100) [glmerMod]
##  Family: binomial  ( probit )
## Formula: cbind(y, 10 - y) ~ x + (1 | group)
##      AIC      BIC   logLik deviance df.resid 
## 139.5661 147.3816 -66.7830 133.5661       97 
## Random effects:
##  Groups Name        Std.Dev.
##  group  (Intercept) 1.022   
## Number of obs: 100, groups:  group, 10
## Fixed Effects:
## (Intercept)            x  
##      0.4126       1.0943

# Laplace approximation (by default nAGQ = 1) (usually very accurate)
glmer(cbind(y,10-y) ~ x + (1|group), family=binomial(link="probit"))

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( probit )
## Formula: cbind(y, 10 - y) ~ x + (1 | group)
##       AIC       BIC    logLik  deviance  df.resid 
##  308.9426  316.7581 -151.4713  302.9426        97 
## Random effects:
##  Groups Name        Std.Dev.
##  group  (Intercept) 1.025   
## Number of obs: 100, groups:  group, 10
## Fixed Effects:
## (Intercept)            x  
##       0.414        1.097

glmmTMB::glmmTMB(cbind(y,10-y) ~ x + (1|group), family=binomial(link="probit"))

## Formula:          cbind(y, 10 - y) ~ x + (1 | group)
##       AIC       BIC    logLik  df.resid 
##  308.8660  316.6815 -151.4330        97 
## Random-effects (co)variances:
## 
## Conditional model:
##  Groups Name        Std.Dev.
##  group  (Intercept) 1.022   
## 
## Number of obs: 100 / Conditional model: group, 10
## 
## Fixed Effects:
## 
## Conditional model:
## (Intercept)            x  
##      0.4126       1.0943

# Laplace approximation of REML likelihood
glmmTMB::glmmTMB(cbind(y,10-y) ~ x + (1|group), family=binomial(link="probit"), REML=TRUE)

## Formula:          cbind(y, 10 - y) ~ x + (1 | group)
##       AIC       BIC    logLik  df.resid 
##  312.3622  320.1777 -153.1811        99 
## Random-effects (co)variances:
## 
## Conditional model:
##  Groups Name        Std.Dev.
##  group  (Intercept) 1.084   
## 
## Number of obs: 100 / Conditional model: group, 10
## 
## Fixed Effects:
## 
## Conditional model:
## (Intercept)            x  
##      0.4088       1.0896

# Penalized quasi likelihood (less accurate and doesn't give a real likelihood)
MASS::glmmPQL(cbind(y,10-y) ~ x, ~ 1|group, family=binomial(link="probit"))

## iteration 1

## iteration 2

## iteration 3

## iteration 4

## iteration 5

## Linear mixed-effects model fit by maximum likelihood
##   Data: NULL 
##   Log-likelihood: NA
##   Fixed: cbind(y, 10 - y) ~ x 
## (Intercept)           x 
##    0.407451    1.087134 
## 
## Random effects:
##  Formula: ~1 | group
##         (Intercept) Residual
## StdDev:    1.011412 0.982036
## 
## Variance function:
##  Structure: fixed weights
##  Formula: ~invwt 
## Number of Observations: 100
## Number of Groups: 10

Nov. 8: Toy implementation of automatic differentiation

As opposed to symbolic and numerical differentation. Loosely based on Wood ch. 5.5.3

Example: Suppose we want to compute the following expression an its derivatives with respect to the input x1,x2,x2

x1 <- 1; 
x2 <- 2; 
x3 <- pi/2
(x1*x2*sin(x3)+exp(x1*x2))/x3

## [1] 5.977259

# function constructing objects ("dual numbers") containing both value and 
# associated derivates w.r.t x1, x2 and x3.
ad <- function(x,grad=1) {
  attr(x,"grad") <- grad
  class(x) <- "ad"
  x
}

# Define inputs x1, x2, x3 as dual numbers
x1 <- ad(1,    c(1,0,0))
x2 <- ad(2,    c(0,1,0))
x3 <- ad(pi/2, c(0,0,1))

# convenience functions extracting value and gradient from ad objects
grad <- function(x) {
  if (class(x)=="ad") 
    attr(x,"grad")
  else # independent non-ad inputs have a gradient of 0
    0 
}
val <- function(x) as.numeric(x)

# Overloading of some elementary functions and operators (methods for "ad"-class)
"+.ad" <- function(a,b) ad(val(a) + val(b), grad(a) + grad(b))
"-.ad" <- function(a,b) ad(val(a) - val(b), grad(a) - grad(b))
"*.ad" <- function(a,b) ad(val(a)*val(b),   val(a)*grad(b) + grad(a)*val(b))
"/.ad" <- function(a,b) ad(val(a)/val(b),   (grad(a)*val(b) - grad(b)*val(a))/(val(b))^2)
sin.ad <- function(a)   ad(sin(val(a)),     cos(val(a))*grad(a))
exp.ad <- function(a)   ad(exp(val(a)),     exp(val(a))*grad(a))
log.ad <- function(a)   ad(log(val(a)),      1/val(a)*grad(a))
"^.ad" <- function(a,b) exp(b*log(a)) # reexpress in terms of exp.ad, log.ad and *.ad

# Let the magic happen!
(x1*x2*sin(x3) + exp(x1*x2))/x3

## [1] 5.977259
## attr(,"grad")
## [1] 10.681278  5.340639 -3.805241
## attr(,"class")
## [1] "ad"

# Second example comparing automatic,
f <- function(x) x^x
print(f(ad(3)), digits=15) # (x^x, d/dx x^x) for x=3

## [1] 27
## attr(,"grad")
## [1] 56.662531794039
## attr(,"class")
## [1] "ad"

# symbolic,
print(3^3*(log(3)+1), digits=15)

## [1] 56.662531794039

# and numerical differentiation (achives precision only to about 1e-6)
for (h in .1^(1:15)) { 
  print((f(3 + h) - f(3))/h, digits=15) 
}

## [1] 63.5963197890903
## [1] 57.3071800899029
## [1] 56.7265387036464
## [1] 56.6689279338206
## [1] 56.6631713628141
## [1] 56.6625957567623
## [1] 56.6625380926666
## [1] 56.6625320885805
## [1] 56.6625359965655
## [1] 56.6625502074203
## [1] 56.662585734557
## [1] 56.6693358905468
## [1] 56.6302560400799
## [1] 57.9092329644481
## [1] 49.737991503207

R code examples