## R-code for lda analysis adapted from R-code provided in
## https://stat.ethz.ch/pipermail/r-help/2010-May/239195.html

# =========================================================================================
# (1) Generate sample labelled data   
# =========================================================================================

# needed for rmvnorm
library(mvtnorm) 
set.seed(11)

# true covariance matrix
sigma <- diag(c(0.5, 0.25))  
mu <- matrix(0,nrow=3,ncol=2)
# true mean values
mu[1,] <- c(-1,1)        
mu[2,] <- c(1,1)
mu[3,] <- c(0,-2)

n <- 300
J <- 3
x <- matrix(0, nrow = n, ncol = J)
# class labels
x[,3] <- rep(1:3, each = n/J)     
x[1  :100,1:2] <- rmvnorm(n = n/J, mean = mu[1,], sigma = sigma)   
x[101:200,1:2] <- rmvnorm(n = n/J, mean = mu[2,], sigma = sigma)
x[201:300,1:2] <- rmvnorm(n = n/J, mean = mu[3,], sigma = sigma)

colnames(x) <- c("x", "y", "class")

# =========================================================================================
# (2) Plot the labelled data   
# =========================================================================================

##
## Function for plotting the data separated by classes, hacked out of predplot:
## http://stat.ethz.ch/teaching/lectures/FS_2010/CompStat/predplot.R
##
plotclasses <- function(x, main = "", len = 200, ...) {
    xp <- seq(min(x[,1]), max(x[,1]), length=len)
    yp <- seq(min(x[,2]), max(x[,2]), length=len)
    grid <- expand.grid(xp, yp)
    colnames(grid) <- colnames(x)[-3]
    plot(x[,1],x[,2],col=c(1,2,4)[x[,3]],pch=x[,3],main=main,xlab='x',ylab='y', ...)
}

#plotclasses(x)

# =========================================================================================
# (3) Functions needed: calculate separating hyperplane between two given 
#     classes and converting hyperplanes to line equations for the p=2 case     
# =========================================================================================

## Returns the coefficients for the hyperplane that separates one class from another. 
## Computes the coefficients according to the formula: 
## $x^T\hat{\Sigma}^{-1}(\hat{\mu}_0-\hat{\mu}_1) - \frac{1}{2}(\hat{\mu}_0 + 
## \hat{\mu}_1)^T\hat{\Sigma}^{-1}(\hat{\mu}_0-\hat{\mu}_1)+\log(\frac{p_0}{p_1})$  
##
## sigmainv(DxD) - precalculated sigma (covariance matrix) inverse
## mu1(1xD) - precalculated mu mean for class 1
## mu2(1xD) - precalculated mu mean for class 2
## prior1 - precalculated prior probability for class 1
## prior2 - precalculated prior probability for class 2

ownldahyperplane <- function(sigmainv,mu1,mu2,prior1,prior2) {
    J <- nrow(mu)
    b <- sigmainv%*%(mu1 - mu2)
    c <- -(1/2)*t(mu1 + mu2)%*%sigmainv%*%(mu1 - mu2) + log(prior1/prior2) 
    return(list(b=b,c=c))
}

## Returns linear betas (intersect and slopes) for the given hyperplane structure. 
## The structure is a list that matches the output of the function defined above. 
ownlinearize <- function(sephyp) {
    # line slope and intersect
    return(list(beta0=-sephyp$c/sephyp$b[2],                                   
        beta1=-sephyp$b[1]/sephyp$b[2]))
}

# =========================================================================================
# (4) Run lda  
# =========================================================================================

# needed for lda/qda
library(MASS)

fit <- lda(x=x[,1:2],grouping=x[,3])

# extract A matrix 
A <- fit$scaling      
# calculate sigma hat inverse
sigmahatinv <- A%*%t(A) 
# get prior hat probabilities                                             
priorhat <- fit$prior 
# get mu hat
muhat <- fit$means                                                                             

# =========================================================================================
# (5) Calculate the separating hyperplanes which can be added using abline
#     or create the class boundaries using lines by fixing six points. 
#     Run the abline one at the time after running step 2 anew  
# =========================================================================================


# calculate dec. boundary 1-2
sephyp12 <- ownldahyperplane(sigmahatinv,muhat[1,],muhat[2,],     
    priorhat[1],priorhat[2])
# get line for 1-2
line12 <- ownlinearize(sephyp12)                                                       
                                               
sephyp23 <- ownldahyperplane(sigmahatinv,muhat[2,],muhat[3,], 
    priorhat[2],priorhat[3])
line23 <- ownlinearize(sephyp23)
                                 
sephyp13 <- ownldahyperplane(sigmahatinv,muhat[1,],muhat[3,],  
    priorhat[1],priorhat[3])
line13 <- ownlinearize(sephyp13) 


# ========================================================================================
# (6) Run this snippet to see the class regions 
# =========================================================================================

A= cbind(c(line13$beta1, line23$beta1), c(1,1))
b= c(line13$beta0, line23$beta0)
is12 = solve(A, b)
                                                            
plotclasses(x, xlim=c(-4,4), ylim=c(-4,4))
library(plotrix)
# abline(a=line13$beta0, b=line13$beta1)
# abline(a=line12$beta0, b=line12$beta1)
# abline(a=line23$beta0, b=line23$beta1)


ablineclip(a=line13$beta0, b=line13$beta1, x1=-4, x2=is12[1], y1=-4, y2=is12[2])
ablineclip(a=line12$beta0, b=line12$beta1, x1=-4, x2=4, y1=is12[2], y2=4)
ablineclip(a=line23$beta0, b=line23$beta1, x1=is12[1], x2=4, y1=-4, y2=is12[2])


# ========================================================================================
# Predictions of new observations
# =========================================================================================

topred <- as.data.frame(rbind(c(-2,-2),
c(-2,1),
c(0,0),
c(2,2)))

colnames(topred) <- c("x", "y")

points(x=topred[,1], y=topred[,2], col="green", pch="*",cex=3)

# use the predict function to classify the new points
predict(fit, topred)$posterior
predict(fit, topred)$class