#  This file  contains a logistic regression analysis of 
#  the Checker data applied to the test data
#  Authors:
#       R.W. Oldford, 2004
#
#


# just a handy function to capture the pathname for the files we will load.
web441 <- function(x)
         {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/',
                 x,
                 sep='')
         }
# get code for creating densities  
# (in class we just loaded this file since the 
#  machine wasn't connected to the internet).
#
# Get the data:
source(web441('checker.R'))

# plot it
x1.range <- range(checker.train[,"x1"])
x2.range <- range(checker.train[,"x2"])

xgrid <- expand.grid(x1=seq(x1.range[1], x1.range[2], length = 21),
                     x2=seq(x2.range[1], x2.range[2], length = 21))

# plot it on the test data.
#  Linear logistic
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.test[checker.test[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.test[checker.test[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)


# Fit the simple logistic model
fit1 <- glm(y ~ x1 + x2, family ="binomial", data = checker.train)
pHat <- predict(fit1, newdata = xgrid, type="response")
# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat <- matrix(pHat, nrow = 21, ncol = 21, byrow = FALSE)

contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat, 
        nlevels = 4, 
        add=T, 
        lty=2, col="brown", lwd=2)

table(checker.test[,"y"], 
      predict(fit1, newdata= checker.test, type="response") > 0.5)


# Second order logistic

#
# Fit the logistic model with a second order (still linear) predictor.
fit2 <- glm(y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1*x2, 
           family ="binomial", data = checker.train)

pHat2 <- predict(fit2, newdata = xgrid, type="response")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat2 <- matrix(pHat2, nrow = 21, ncol = 21, byrow = FALSE)


quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.test[checker.test[,"y"] == 0, c("x1", "x2")], pch="0",
       col="red", cex=1.2)
points(checker.test[checker.test[,"y"] == 1, c("x1", "x2")], pch="+",
       col="blue", cex=1.2)

contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="steelblue", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="steelblue", lwd=2)

table(checker.test[,"y"], 
      predict(fit2, newdata= checker.test, type="response") > 0.5)