ds <- read.table("http://data.princeton.edu/wws509/datasets/cuse.dat", 
    header = TRUE)
head(ds)
##     age education wantsMore notUsing using
## 1   <25       low       yes       53     6
## 2   <25       low        no       10     4
## 3   <25      high       yes      212    52
## 4   <25      high        no       50    10
## 5 25-29       low       yes       60    14
## 6 25-29       low        no       19    10

Problem 1: The null model - no covariates

a)

Explain fit0:

fit0 <- glm(cbind(using, notUsing) ~ 1, data = ds, family = "binomial")
fit0  # see that residual deviance is the null deviance
## 
## Call:  glm(formula = cbind(using, notUsing) ~ 1, family = "binomial", 
##     data = ds)
## 
## Coefficients:
## (Intercept)  
##     -0.7746  
## 
## Degrees of Freedom: 15 Total (i.e. Null);  15 Residual
## Null Deviance:       165.8 
## Residual Deviance: 165.8     AIC: 239.3
# estimating the probability p using the data can be done directly,
# using the MLE estimate

b)

Estimated coefficient and proportion using contraception: Log-likelihood \[l(\beta)=\beta_0 N_1 - \log(1+\exp(\beta_0))N\] and derivative thereof (score function) \[ s(\beta_0)=\frac{\partial l}{\partial \beta} = N_1-N\frac{\exp(\beta_0)}{1+\exp(\beta_0)}\] and to find the MLE we set \(s(\hat{\beta}_0)=0\), and get

\[ \frac{\exp(\hat{\beta}_0)}{1+\exp(\hat{\beta}_0)}=\frac{N_1}{N}\] Observe that this also means that \[\hat{\pi}=\frac{N_1}{N}\] and further that \[ \hat{\beta}_0=\text{logit}(\frac{N_1}{N})\]

N <- sum(ds$using + ds$notUsing)
N1 <- sum(ds$using)
N2 <- N - N1
qlogis(N1/N)  # this is the beta0hat
## [1] -0.7745545
fit0$coefficients
## (Intercept) 
##  -0.7745545

c)

Estimated variance of \(\hat{\beta}_0\): \(\text{Cov}(\hat{\beta}_0)=F^{-1}(\hat{\beta}_0)\), and since is a scalar we might call it \(\text{Var}(\hat{\beta}_0)\).

\[F(\beta_0) = \sum_{j=1}^G x_jx_j^T n_j \pi_j(1-\pi_j)=\sum_{j=1}^G n_j \pi(1-\pi)=N\pi(1-\pi)\] The inverse is then: \[F^{-1}(\beta_0)=\frac{1}{N\pi (1-\pi)}\] and inserted \(\hat{\pi}=\frac{N_1}{N}\) to give \[\text{Var}(\hat{\beta}_0)=F^{-1}(\hat{\beta}_0)=\frac{1}{N \hat{\pi} (1-\hat{\pi})}=\frac{1}{N \frac{N_1}{N}\frac{N_2}{N}}=\frac{N}{N_1N_2}=\frac{1}{N_1}+\frac{1}{N_2}\]

d)

# Fisher-matrix:
1/N1 + 1/N2
## [1] 0.002881477
# (1x1-matrix)

vcov(fit0)  # same as we calculated
##             (Intercept)
## (Intercept) 0.002881477
# beta0 is normal with mean fit0$coefficients and variance vcov(fit0)

e and f)

critval <- qnorm(0.025, lower.tail = FALSE)
ci <- c(fit0$coefficients - critval * sqrt(vcov(fit0)), fit0$coefficients + 
    critval * sqrt(vcov(fit0)))

confint.default(fit0)  # assumes asymptotic gaussian distribution
##                  2.5 %     97.5 %
## (Intercept) -0.8797641 -0.6693448
confint(fit0)
##      2.5 %     97.5 % 
## -0.8804716 -0.6700014
plogis(ci)
## [1] 0.2932267 0.3386436
# does the same
fit0$family$linkinv(ci)
## [1] 0.2932267 0.3386436

Problem 2: We then study the effect of the covariate “wantsMore”

a)

fit1 <- glm(cbind(using, notUsing) ~ wantsMore, data = ds, family = binomial)
summary(fit1)  # Note that null-deviance is as fit0, but residual deviance is smaller
## 
## Call:
## glm(formula = cbind(using, notUsing) ~ wantsMore, family = binomial, 
##     data = ds)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.7091  -1.2756  -0.3467   1.4667   3.5505  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.18636    0.07971  -2.338   0.0194 *  
## wantsMoreyes -1.04863    0.11067  -9.475   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.772  on 15  degrees of freedom
## Residual deviance:  74.098  on 14  degrees of freedom
## AIC: 149.61
## 
## Number of Fisher Scoring iterations: 4
exp(fit1$coefficients[2])
## wantsMoreyes 
##    0.3504178

Parameter interpretation: if you want more kids, you are less probable to use contraceptives (this makes sense). And, when you change from not wanting more kids to wanting more kids the odds for contraceptive will be multiplied by \(\exp(\hat{\beta_1})\) 0.3504178. Remember, when \(\beta<0\) we have a decrease in the odds.

b)

\(H_0\): \(\beta_1 = 0\), \(H_1\): \(\beta_1 \neq 0\)

Wald test: \(W = (\hat{\beta_1}/(\text{SD}(\hat{\beta_1})))^2\)

summary(fit1)
## 
## Call:
## glm(formula = cbind(using, notUsing) ~ wantsMore, family = binomial, 
##     data = ds)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.7091  -1.2756  -0.3467   1.4667   3.5505  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.18636    0.07971  -2.338   0.0194 *  
## wantsMoreyes -1.04863    0.11067  -9.475   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.772  on 15  degrees of freedom
## Residual deviance:  74.098  on 14  degrees of freedom
## AIC: 149.61
## 
## Number of Fisher Scoring iterations: 4
# covariance matrix
summary(fit1)$cov.scaled
##               (Intercept) wantsMoreyes
## (Intercept)   0.006354067 -0.006354067
## wantsMoreyes -0.006354067  0.012248298
vcov(fit1)
##               (Intercept) wantsMoreyes
## (Intercept)   0.006354067 -0.006354067
## wantsMoreyes -0.006354067  0.012248298
# if you want to calculate the Fisher info directly from the formula
# -- but not needed! Can be found by using vcov(fit1)
new_ds <- xtabs(formula = cbind(using, notUsing) ~ wantsMore, data = ds)
fisherlist_1 <- list(x = cbind(c(1, 1), c(0, 1)), n = cbind(new_ds[, 
    1] + new_ds[, 2]), pi = fit1$family$linkinv(cbind(c(1, 1), c(0, 1)) %*% 
    fit1$coefficients))

gr1 <- with(fisherlist_1, x[1, ] %*% t(x[1, ]) * n[1, ] * pi[1, ] * (1 - 
    pi[1, ]))
gr2 <- with(fisherlist_1, x[2, ] %*% t(x[2, ]) * n[2, ] * pi[2, ] * (1 - 
    pi[2, ]))

fisher_1 <- gr1 + gr2

covmat_1 <- solve(fisher_1)
covmat_1
##              [,1]         [,2]
## [1,]  0.006354067 -0.006354067
## [2,] -0.006354067  0.012248298
beta_1 <- fit1$coefficients[2]
sd_1 <- sqrt(covmat_1[2, 2])
wald_1 <- (beta_1/sd_1)^2
wald_1  # the squared z-statistic from the summary table
## wantsMoreyes 
##     89.77763
pchisq(wald_1, 1, lower.tail = FALSE)
## wantsMoreyes 
## 2.664901e-21
# likelihood ratio test

diff_dev <- fit0$deviance - fit1$deviance
diff_df <- 15 - 14
diff_dev
## [1] 91.6744
pchisq(diff_dev, diff_df, lower.tail = FALSE)
## [1] 1.021784e-21

c)

\(LRT = -2(l(SMALL) - l(BIG))\)

loglik <- function(par, args) {
    y <- args$y
    x <- args$x
    n <- args$n
    res <- sum(y * x %*% par - n * log(1 + exp(x %*% par)))
    return(res)
}

args0 <- list(y = ds$using, n = ds$using + ds$notUsing, x = cbind(rep(1, 
    nrow(ds))))
args1 <- list(y = ds$using, n = ds$using + ds$notUsing, x = cbind(rep(1, 
    nrow(ds)), as.numeric(ds$wantsMore) - 1))

betas0 <- fit0$coefficients
betas1 <- fit1$coefficients

ll0 <- loglik(betas0, args0)
# or
loglik(matrix(c(betas0, 0), ncol = 1), args1)
## [1] -1001.847
# and
ll1 <- loglik(betas1, args1)

lrtest_1 <- -2 * (ll0 - ll1)
lrtest_1
## [1] 91.6744
diff_df <- 15 - 14  # 1 parameter in the first model, 2 in the second, and 16 observations in both
pchisq(lrtest_1, diff_df, lower.tail = FALSE)
## [1] 1.021784e-21

d)

The deviance is given by \(-2(l(model) - l(saturated))\). The difference in the deviances for two models A and B is then \(-2(l_A - l_S) - (-2(l_B - l_S)) = -2l_A + 2l_S + 2l_B - 2l_S = 2l_B - 2l_A = -2(l_A - l_B)\). In our case, fit0 is model A and fit1 is model B.

fit0$deviance - fit1$deviance
## [1] 91.6744

e)

They differ, but give the same conclusion as the critical value is 3.841. Both are assumed to be \(\chi^2\)-distributed with 1 df.

Problem 3: Now two covariates - deviance and model comparison

a)

No solution given (explain the content of each model, before seeing diagnostics).

b)

See the module pages on deviance for definition. Degrees of freedom (df) is the difference between number of observations (or groups) and the number of parameters \(\beta\) in your model. Deviance is used for assessing model fit, and model choice.

c)

You can compare all nested models, i.e., models where one is a submodel of the other, using the likelihood-ratio test. They have to be nested, since you test whether a model with fewer parameters (null-hypothesis) is better than others. The smaller model will always have larger (or equal) deviance than the larger model.

ds$Y <- cbind(ds$using, ds$notUsing)
models <- list(
  null     = glm(Y ~ 1, family = binomial, data = ds),
  age      = glm(Y ~ age, family = binomial, data = ds),
  desire   = glm(Y ~ wantsMore, family = binomial, data = ds),
  additive = glm(Y ~ age + wantsMore, family = binomial, data = ds),
  interact = glm(Y ~ age*wantsMore, family = binomial, data = ds)
)
models
## $null
## 
## Call:  glm(formula = Y ~ 1, family = binomial, data = ds)
## 
## Coefficients:
## (Intercept)  
##     -0.7746  
## 
## Degrees of Freedom: 15 Total (i.e. Null);  15 Residual
## Null Deviance:       165.8 
## Residual Deviance: 165.8     AIC: 239.3
## 
## $age
## 
## Call:  glm(formula = Y ~ age, family = binomial, data = ds)
## 
## Coefficients:
## (Intercept)     age25-29     age30-39     age40-49  
##     -1.5072       0.4607       1.0483       1.4246  
## 
## Degrees of Freedom: 15 Total (i.e. Null);  12 Residual
## Null Deviance:       165.8 
## Residual Deviance: 86.58     AIC: 166.1
## 
## $desire
## 
## Call:  glm(formula = Y ~ wantsMore, family = binomial, data = ds)
## 
## Coefficients:
##  (Intercept)  wantsMoreyes  
##      -0.1864       -1.0486  
## 
## Degrees of Freedom: 15 Total (i.e. Null);  14 Residual
## Null Deviance:       165.8 
## Residual Deviance: 74.1  AIC: 149.6
## 
## $additive
## 
## Call:  glm(formula = Y ~ age + wantsMore, family = binomial, data = ds)
## 
## Coefficients:
##  (Intercept)      age25-29      age30-39      age40-49  wantsMoreyes  
##      -0.8698        0.3678        0.8078        1.0226       -0.8241  
## 
## Degrees of Freedom: 15 Total (i.e. Null);  11 Residual
## Null Deviance:       165.8 
## Residual Deviance: 36.89     AIC: 118.4
## 
## $interact
## 
## Call:  glm(formula = Y ~ age * wantsMore, family = binomial, data = ds)
## 
## Coefficients:
##           (Intercept)               age25-29               age30-39  
##               -1.4553                 0.6354                 1.5411  
##              age40-49           wantsMoreyes  age25-29:wantsMoreyes  
##                1.7643                -0.0640                -0.2672  
## age30-39:wantsMoreyes  age40-49:wantsMoreyes  
##               -1.0905                -1.3671  
## 
## Degrees of Freedom: 15 Total (i.e. Null);  8 Residual
## Null Deviance:       165.8 
## Residual Deviance: 20.1  AIC: 107.6
lapply(models,summary)
## $null
## 
## Call:
## glm(formula = Y ~ 1, family = binomial, data = ds)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.3264  -2.4619  -0.7162   2.1273   5.4591  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.77455    0.05368  -14.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.77  on 15  degrees of freedom
## Residual deviance: 165.77  on 15  degrees of freedom
## AIC: 239.28
## 
## Number of Fisher Scoring iterations: 4
## 
## 
## $age
## 
## Call:
## glm(formula = Y ~ age, family = binomial, data = ds)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.5141  -1.5019   0.3857   0.9679   3.5907  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5072     0.1303 -11.571  < 2e-16 ***
## age25-29      0.4607     0.1727   2.667  0.00765 ** 
## age30-39      1.0483     0.1544   6.788 1.14e-11 ***
## age40-49      1.4246     0.1940   7.345 2.06e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.772  on 15  degrees of freedom
## Residual deviance:  86.581  on 12  degrees of freedom
## AIC: 166.09
## 
## Number of Fisher Scoring iterations: 4
## 
## 
## $desire
## 
## Call:
## glm(formula = Y ~ wantsMore, family = binomial, data = ds)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.7091  -1.2756  -0.3467   1.4667   3.5505  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.18636    0.07971  -2.338   0.0194 *  
## wantsMoreyes -1.04863    0.11067  -9.475   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.772  on 15  degrees of freedom
## Residual deviance:  74.098  on 14  degrees of freedom
## AIC: 149.61
## 
## Number of Fisher Scoring iterations: 4
## 
## 
## $additive
## 
## Call:
## glm(formula = Y ~ age + wantsMore, family = binomial, data = ds)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7870  -1.3208  -0.3417   1.2346   2.4577  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.8698     0.1571  -5.536 3.10e-08 ***
## age25-29       0.3678     0.1754   2.097    0.036 *  
## age30-39       0.8078     0.1598   5.056 4.27e-07 ***
## age40-49       1.0226     0.2039   5.014 5.32e-07 ***
## wantsMoreyes  -0.8241     0.1171  -7.037 1.97e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.772  on 15  degrees of freedom
## Residual deviance:  36.888  on 11  degrees of freedom
## AIC: 118.4
## 
## Number of Fisher Scoring iterations: 4
## 
## 
## $interact
## 
## Call:
## glm(formula = Y ~ age * wantsMore, family = binomial, data = ds)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.67001  -0.85288   0.02621   0.72300   2.18925  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -1.4553     0.2968  -4.903 9.43e-07 ***
## age25-29                0.6354     0.3564   1.783  0.07463 .  
## age30-39                1.5412     0.3183   4.842 1.29e-06 ***
## age40-49                1.7643     0.3435   5.136 2.80e-07 ***
## wantsMoreyes           -0.0640     0.3303  -0.194  0.84637    
## age25-29:wantsMoreyes  -0.2672     0.4091  -0.653  0.51366    
## age30-39:wantsMoreyes  -1.0905     0.3733  -2.921  0.00349 ** 
## age40-49:wantsMoreyes  -1.3672     0.4834  -2.828  0.00468 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.772  on 15  degrees of freedom
## Residual deviance:  20.099  on  8  degrees of freedom
## AIC: 107.61
## 
## Number of Fisher Scoring iterations: 4
df <- data.frame(deviance = round(unlist(lapply(models, deviance)), 2),
                 df = unlist(lapply(models, df.residual)),
                 aic = round(unlist(lapply(models, AIC))))

# comparing the model with age + wantsMore (small model) and age + wantsMore + age:wantsMore (large model)
# can use deviances instead of log-likelihood, then we have the formula dev(small)-dev(large). we do not multiply with (-2) now
# we test whether the test statistic is chisq with df = difference in number of parameters. This is the same as the difference in degrees of freedom

dev_small <- df$deviance[4]
dev_large <- df$deviance[5]

df_small <- df$df[4]
df_large <- df$df[5]

lrt <- dev_small - dev_large
df_lrt <- df_small - df_large # 3
qchisq(0.95, df_lrt) # critical value at 5 %
## [1] 7.814728
lrt # much larger than critical value
## [1] 16.79
pchisq(lrt, df_lrt, lower.tail = FALSE) # better with interaction
## [1] 0.0007806146

d)

We want low AIC, so again we choose the interaction-model.

Problem 4: Plotting

a)

fit3add <- glm(cbind(using, notUsing) ~ age + education + wantsMore, 
    data = ds, family = binomial)
summary(fit3add)
## 
## Call:
## glm(formula = cbind(using, notUsing) ~ age + education + wantsMore, 
##     family = binomial, data = ds)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5148  -0.9376   0.2408   0.9822   1.7333  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.8082     0.1590  -5.083 3.71e-07 ***
## age25-29       0.3894     0.1759   2.214  0.02681 *  
## age30-39       0.9086     0.1646   5.519 3.40e-08 ***
## age40-49       1.1892     0.2144   5.546 2.92e-08 ***
## educationlow  -0.3250     0.1240  -2.620  0.00879 ** 
## wantsMoreyes  -0.8330     0.1175  -7.091 1.33e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.772  on 15  degrees of freedom
## Residual deviance:  29.917  on 10  degrees of freedom
## AIC: 113.43
## 
## Number of Fisher Scoring iterations: 4

Rodriges: “Additive: The deviance of 29.92 on 10 d.f. tells us that this model does not fit the data, so the assumption that logit differences by one variable are the same across categories of the other two is suspect.”

The saturated model is a model where we have all possible interactions:

summary(glm(cbind(using, notUsing) ~ age * education * wantsMore, data = ds, 
    family = binomial))
## 
## Call:
## glm(formula = cbind(using, notUsing) ~ age * education * wantsMore, 
##     family = binomial, data = ds)
## 
## Deviance Residuals: 
##  [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)                         -1.6094     0.3464  -4.646 3.38e-06
## age25-29                             0.7309     0.4152   1.760  0.07838
## age30-39                             1.7466     0.3841   4.547 5.43e-06
## age40-49                             2.5585     0.4854   5.271 1.36e-07
## educationlow                         0.6931     0.6856   1.011  0.31199
## wantsMoreyes                         0.2041     0.3794   0.538  0.59062
## age25-29:educationlow               -0.4565     0.8216  -0.556  0.57852
## age30-39:educationlow               -0.7921     0.7232  -1.095  0.27338
## age40-49:educationlow               -1.5997     0.7926  -2.018  0.04356
## age25-29:wantsMoreyes               -0.3800     0.4705  -0.808  0.41928
## age30-39:wantsMoreyes               -1.2833     0.4491  -2.858  0.00427
## age40-49:wantsMoreyes               -1.1532     0.7138  -1.615  0.10620
## educationlow:wantsMoreyes           -1.4663     0.8243  -1.779  0.07526
## age25-29:educationlow:wantsMoreyes   0.8288     0.9988   0.830  0.40666
## age30-39:educationlow:wantsMoreyes   1.2854     0.8955   1.435  0.15119
## age40-49:educationlow:wantsMoreyes   0.6093     1.1326   0.538  0.59063
##                                       
## (Intercept)                        ***
## age25-29                           .  
## age30-39                           ***
## age40-49                           ***
## educationlow                          
## wantsMoreyes                          
## age25-29:educationlow                 
## age30-39:educationlow                 
## age40-49:educationlow              *  
## age25-29:wantsMoreyes                 
## age30-39:wantsMoreyes              ** 
## age40-49:wantsMoreyes                 
## educationlow:wantsMoreyes          .  
## age25-29:educationlow:wantsMoreyes    
## age30-39:educationlow:wantsMoreyes    
## age40-49:educationlow:wantsMoreyes    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1.6577e+02  on 15  degrees of freedom
## Residual deviance: 2.8866e-15  on  0  degrees of freedom
## AIC: 103.51
## 
## Number of Fisher Scoring iterations: 3

We have no degrees of freedom here! So we have 0 deviance, but no predictive power.

b)

library(ggplot2)
plotdf <- data.frame(dres = fit3add$residuals, fitted = fit3add$fitted.values, 
    age = ds$age)
ggplot(plotdf, aes(x = fitted, y = dres)) + geom_point() + labs(x = "Fitted values", 
    y = "Deviance residuals")

Not any particular trend.

c)

# Same code as in the module pages
frac = ds$using/(ds$using + ds$notUsing)
logitdf = data.frame(lfitted = qlogis(fit3add$fitted), lfrac = qlogis(frac), 
    age = ds$age, wantsMore = ds$wantsMore, education = ds$education)
ggplot(logitdf, aes(x = age)) + geom_point(aes(y = lfrac, colour = "saturated")) + 
    geom_point(aes(y = lfitted, colour = "fitted")) + facet_wrap(facets = ~wantsMore * 
    education) + labs(x = "age", y = "")