#  This file  contains a mixture of gaussians analysis of 
#  the Checker data from Will Welch's notes Chapt. 1
#      
#  Authors:
#       R.W. Oldford, 2004
#
#


# just a handy function to capture the pathname for the files we will load.
web441 <- function(x)
         {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/',
                 x,
                 sep='')
         }
# get code for creating densities  
# (in class we just loaded this file since the 
#  machine wasn't connected to the internet).
#
# Get the data:
source(web441('checker.R'))

# Get the multivariate normal density function:
source(web441('dmvnorm.R'))

# plot it
x1.range <- range(checker.train[,"x1"])
x2.range <- range(checker.train[,"x2"])


# The following sets up the grid of points on which the densities will
# be evaluated.
x1.grid <-  rep(seq(x1.range[1],x1.range[2],l=40),48)
x2.grid <- rep(seq(x2.range[1],x2.range[2],l=48),rep(40,48))
x.grid <- cbind(x1.grid,x2.grid)

# Here are the grid points
quartz()
plot(x.grid)

# Now plot the data
#

plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)

#  Separate and model the two groups, each as a mixture of two gaussians
#
# Group 0 points
group0 <- checker.train[checker.train[,3]==0,1:2]

# Group 1 points
group1 <- checker.train[checker.train[,3]==1,1:2]

# Now we will divide each of these groups (by eye) into two sub groups
#
# group 0 subgroup a

group0a <- group0[(group0[,1] < 4 & group0[,2] < 5),]
# group 0 subgroup b

group0b <- group0[(group0[,1] >= 4 | group0[,2] >= 5),]


#
# group 1 subgroup a

group1a <- group1[group1[,1] < 4,]
# group 2 subgroup b

group1b <- group1[group1[,1] >= 4 ,]

#
# Now we fit the mixture model
#

# Group 0
#
# mixing probability estimates
n0a <- nrow(group0a)
n0b <- nrow(group0b)
pi0a <- n0a /(n0a + n0b)
pi0b <- 1 - pi0a

# mean and covariance parameter estimates
mu0a <- mean(group0a)
Sigma0a <- var(group0a)
mu0b <- mean(group0b)
Sigma0b <- var(group0b)

# The mixture density (calculated on the grid points)
den0a <- dmvnorm(x.grid,mu0a,Sigma0a)
den0b <- dmvnorm(x.grid,mu0b,Sigma0b)
den0 <- pi0a * den0a + pi0b * den0b
#

# Plot its contours (6 of them)

# The following ensures that the drawing occurs as if the existing plot
# were `new'.  Otherwise the existing plot would be wiped clean before
# adding the rest.
par(new=T)
contour(unique(x1.grid),unique(x2.grid),matrix(den0,40,48),nlevels=6,col='red')

# Now for
# Group 1
#
# mixing probability estimates
n1a <- nrow(group1a)
n1b <- nrow(group1b)
pi1a <- n1a /(n1a + n1b)
pi1b <- 1 - pi1a

# mean and covariance parameter estimates
mu1a <- mean(group1a)
Sigma1a <- var(group1a)
mu1b <- mean(group1b)
Sigma1b <- var(group1b)


# The mixture density (calculated on the grid points)

source(web441('dmvnorm.R'))
den1a <- dmvnorm(x.grid,mu1a,Sigma1a)
den1b <- dmvnorm(x.grid,mu1b,Sigma1b)
den1 <- pi1a * den1a + pi1b * den1b
# And its contours are
par(new=T)
contour(unique(x1.grid),unique(x2.grid),matrix(den1,40,48),nlevels=6,col='blue')


# 
#  Lets look at the data again and at the discriminant function
#
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)

#
#  The likelihood ratio discriminant function
#

par(new=T)
contour(unique(x1.grid),unique(x2.grid),levels=c(log(1/10), 0, log(10)), 
  matrix(log(den0)-log(den1),40,48),nlevels=1,lwd=3,col='black')

#
# Can easily get a mis/classification table for the fit
#
# first define a handy function
#

denmix <- function(x, p, mu1, mu2,Sigma1, Sigma2) 
          { p * dmvnorm(x,mu1,Sigma1) + (1-p) * dmvnorm(x,mu2,Sigma2) }

# On the training data:
table(checker.train[,"y"], 
      (log(denmix(checker.train[,1:2], pi1a, mu1a, mu1b, Sigma1a, Sigma1b)) 
      - log(denmix(checker.train[,1:2], pi0a, mu0a, mu0b, Sigma0a, Sigma0b))) > 0)
	

# On new test data

plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.test[checker.test[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.test[checker.test[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)

#
#  The likelihood ratio discriminant function
#

par(new=T)
contour(unique(x1.grid),unique(x2.grid),levels=c(0), 
  matrix(log(den0)-log(den1),40,48),nlevels=1,lwd=3,col='black')

# Can easily get a mis/classification table for the fit
table(checker.test[,"y"],
      (log(denmix(checker.test[,1:2], pi1a, mu1a, mu1b, Sigma1a, Sigma1b)) 
      - log(denmix(checker.test[,1:2], pi0a, mu0a, mu0b, Sigma0a, Sigma0b))) > 0)