#  This file  contains a logistic regression analysis of 
#  the Checker data from Will Welch's notes Chapt. 1
#      
#  Authors:
#       R.W. Oldford, 2004
#
#


# just a handy function to capture the pathname for the files we will load.
web441 <- function(x)
         {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/',
                 x,
                 sep='')
         }
# (in class we just loaded this file since the 
#  machine wasn't connected to the internet).
#
# Get the data:
source(web441('checker.R'))

# plot it
x1.range <- range(checker.train[,"x1"])
x2.range <- range(checker.train[,"x2"])
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)


# Fit the simple logistic model
fit1 <- glm(y ~ x1 + x2, family ="binomial", data = checker.train)

# Look at results.  Note significance levels associated with the coefficients.
summary(fit1)

# Can "predict" the class outcome by producing the estimated probability
#  for any x1, x2. 
# In S this is done by creating an appropriate data structure, called a
# data frame, which contains the new data points.
# Here we have two points: (x1,x2)= (6,8) for "Bob",and 
#                          (x1,x2)= (7,10) for "Mary".  
# 
xnew <- structure(
          list(x1 = c(6,7), x2 = c(8,10)),
          .Names=c("x1", "x2"),
          row.names=c("Bob", "Mary"),
          class = "data.frame")
# Here are the predicted probabilities of belonging to class 1 (y=1, so
# the second of the two classes).
predict(fit1, newdata = xnew, type = "response")

# depending on our rule, "Bob" and "Mary" will be classified by the size
# of these estimated probabilities.

# To see the regions chosen by this, we plot contours of the predicted
# probability over a grid of points.
# Compute the fitted probabilities over a grid so that we could plot them.
xgrid <- expand.grid(x1=seq(x1.range[1], x1.range[2], length = 21),
                     x2=seq(x2.range[1], x2.range[2], length = 21))
pHat <- predict(fit1, newdata = xgrid, type="response")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat <- matrix(pHat, nrow = 21, ncol = 21, byrow = FALSE)

# Put contours on corresponding to odds of 10:1, 1:1, and 1:10
# Make the line type for 1:1 be different
# (NB none of these contours appear in the range of the data!
#  Considering the summary of the fit, this is not too surprising.)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)
# Let's look at 4 contours that do appear in the range of the data.
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat, 
        nlevels = 4, 
        add=T, 
        lty=2, col="brown", lwd=2)

#
# Can easily get a mis/classification table for the fit
# On the training data:
table(checker.train[,"y"], fit1$fitted.values > 0.5)

# On new test data

table(checker.test[,"y"], 
      predict(fit1, newdata= checker.test, type="response") > 0.5)
#
######################
#
# Fit the logistic model with a second order (still linear) predictor.
fit2 <- glm(y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1*x2, 
           family ="binomial", data = checker.train)

# Look at results.  Note significance levels associated with the coefficients.
summary(fit2)

# As before, here are the predicted probabilities of belonging to 
# class 1 (y=1, so the second of the two classes) for the "Bob" and
#  "Mary" new data).
predict(fit2, newdata = xnew, type = "response")

# depending on our rule, "Bob" and "Mary" will be classified by the size
# of these estimated probabilities.

# To see the regions chosen by this, we plot contours of the predicted
# probability over a grid of points
pHat2 <- predict(fit2, newdata = xgrid, type="response")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat2 <- matrix(pHat2, nrow = 21, ncol = 21, byrow = FALSE)

# Put contours on corresponding to odds of 10:1, 1:1, and 1:10
# Make the line type for 1:1 be different
# Note that the following puts up a new window in the Mac R  environment
# You may need to do something different for your environment (e.g. X11())
# See help("devices") or dev.cur() for more info.

quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
       col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
       col="blue", cex=1.2)

contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="steelblue", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="steelblue", lwd=2)


#
# Can easily get a mis/classification table for the fit
# On the training data:
table(checker.train[,"y"], fit2$fitted.values > 0.5)

# On new test data

table(checker.test[,"y"], 
      predict(fit2, newdata= checker.test, type="response") > 0.5)
######################
#
# Fit the logistic model with a third order (still linear) predictor.
fit3 <- glm(y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1*x2 +I(x1^3) + I(x2^3),
           family ="binomial", data = checker.train)

# Look at results.  Note significance levels associated with the coefficients.
summary(fit3)

# As before, here are the predicted probabilities of belonging to 
# class 1 (y=1, so the second of the two classes) for the "Bob" and
#  "Mary" new data).
predict(fit3, newdata = xnew, type = "response")

# depending on our rule, "Bob" and "Mary" will be classified by the size
# of these estimated probabilities.

# To see the regions chosen by this, we plot contours of the predicted
# probability over a grid of points
pHat3 <- predict(fit3, newdata = xgrid, type="response")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat3 <- matrix(pHat3, nrow = 21, ncol = 21, byrow = FALSE)

# Put contours on corresponding to odds of 10:1, 1:1, and 1:10
# Make the line type for 1:1 be different
# Note that the following puts up a new window in the Mac R  environment
# You may need to do something different for your environment (e.g. X11())
# See help("devices") or dev.cur() for more info.

quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
       col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
       col="blue", cex=1.2)

contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3, 
        levels = 0.5, 
        add=T, 
        lty=1, col="steelblue", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="steelblue", lwd=2)

#
# Can easily get a mis/classification table for the fit
# On the training data:
table(checker.train[,"y"], fit3$fitted.values > 0.5)

# On new test data


table(checker.test[,"y"], 
      predict(fit3, newdata= checker.test, type="response") > 0.5)


########################
#
#  Could try the same with linear discriminant function
#
library(MASS)
LDAfit <- lda(y~ x1 + x2 +I(x1^2) + I(x2^2) + x1*x2 
               +I(x1^3)+ I(x2^3), data = checker.train)
ldaDF<- predict(LDAfit, xgrid)
ldapHat <- matrix(ldaDF$posterior, nrow = 21, ncol = 21, byrow = FALSE)

quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
       col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
       col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        ldapHat, 
        levels = 0.5,
        add=T, 
        lty=1, col="purple", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        ldapHat, 
        nlevels = 4, 
        add=T, 
        lty=2, col="purple", lwd=2)


#
# Can easily get a mis/classification table for the fit
# On the training data:
table(checker.train[,"y"], predict(LDAfit, checker.train)$posterior[,1] < 0.5)

# On new test data

table(checker.test[,"y"], predict(LDAfit, checker.test)$posterior[,1] < 0.5)

########################
#
#  Or with a quadratic discriminant function
#
QDAfit <- qda(y~ x1 + x2 +I(x1^2) + I(x2^2) + x1*x2 
               +I(x1^3)+ I(x2^3), data = checker.train)
qdaDF<- predict(QDAfit, xgrid)
qdapHat <- matrix(qdaDF$posterior, nrow = 21, ncol = 21, byrow = FALSE)
#

quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
       col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
       col="blue", cex=1.2)


contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        qdapHat, 
        levels = 0.5,
        add=T, 
        lty=1, col="black", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        qdapHat, 
        levels = c(1/11, 10/11), 
        add=T, 
        lty=2, col="black", lwd=2)
#
# Can easily get a mis/classification table for the fit
# On the training data:
table(checker.train[,"y"], predict(QDAfit, checker.train)$posterior[,1] < 0.5)

# On new test data

table(checker.test[,"y"], predict(QDAfit, checker.test)$posterior[,1] < 0.5)