# This file contains a logistic regression analysis of # the Checker data from Will Welch's notes Chapt. 1 # # Authors: # R.W. Oldford, 2004 # # # just a handy function to capture the pathname for the files we will load. web441 <- function(x) {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/', x, sep='') } # get code for creating densities # (in class we just loaded this file since the # machine wasn't connected to the internet). # # Get the data: source(web441('checker.R')) # plot it x1.range <- range(checker.train[,"x1"]) x2.range <- range(checker.train[,"x2"]) plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) # Fit the generalized additive (smooth) logistic model library(mgcv) fit1 <- gam(y ~ s(x1) + s(x2), family ="binomial", data = checker.train) # Although the algorithm failed to converge, we can still look at the results: summary(fit1) # Note that this table gives some idea of how well this (unconverged) model fits # the data. As with generalized linear models, there is a deviance (chi-square) # test associated with each term in the model and a percent of the deviance explained. # Note also the fractional degrees of freedom associated with some of the terms. # We can look at each term (since the model is additive) to see that term's fitted # values as a function of the data. # There are two terms in this model so there will be two functions to view. # First set up to view both plota in the one window par(mfrow=c(2,1)) # Display the two fits plus the standard errors (pointwise) plot(fit1, se=T) # Note that the algorithm failed to converge. We could increase the number of # iterations. Increasing them to 50 as below (from the default 20) still # does not have the algorithm converge. # The tolerance shown (epsilon) is the default (1 x 10^-4). fit1a <- gam(y ~ s(x1) + s(x2), family ="binomial", data = checker.train, control = gam.control(epsilon=1e-04, maxit= 50, globit= 50)) # Even this fails to converge and though the results might be nearly as before although a # larger/smaller percentage of the deviance might be explained by this fit. summary(fit1a) quartz() par(mfrow=c(2,1)) plot(fit1a, se = T) # Here are the predicted probabilities of belonging to class 1 (y=1, so # the second of the two classes). predict(fit1, newdata = xnew, type = "response") # depending on our rule, "Bob" and "Mary" will be classified by the size # of these estimated probabilities. # To see the regions chosen by this, we plot contours of the predicted # probability over a grid of points. # Compute the fitted probabilities over a grid so that we could plot them. xgrid <- expand.grid(x1=seq(x1.range[1], x1.range[2], length = 21), x2=seq(x2.range[1], x2.range[2], length = 21)) pHat <- predict(fit1, newdata = xgrid, type="response") # Put the predicted probabilities in the grid to be used by the contour # program pHat <- matrix(pHat, nrow = 21, ncol = 21, byrow = FALSE) # Put contours on corresponding to odds of 10:1, 1:1, and 1:10 # Make the line type for 1:1 be different # (NB none of these contours appear in the range of the data! # Considering the summary of the fit, this is not too surprising.) # First set up to view both plota in the one window quartz() par(mfrow=c(1,1)) plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # Let's look at 4 contours that do appear in the range of the data. contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat, nlevels = 4, add=T, lty=2, col="green", lwd=2) # # Get a look at the misclassification probabilities on the training data # table(checker.train[,"y"], predict(fit1, newdata= checker.train, type="response") > 0.5) # # Get a look at the misclassification probabilities on the test data # table(checker.test[,"y"], predict(fit1, newdata= checker.test, type="response") > 0.5) # ###################### # # Fit the logistic generalized model with interaction (still additive) predictor. fit2 <- gam(y ~ s (x1) + s(x2) + s (x1 * x2), family ="binomial", data = checker.train, control = gam.control(epsilon=1e-04,maxit= 100, globit= 100)) # # but that failed to converge # # Even so, look at results. Note significance levels associated with the coefficients. summary(fit2) # Note that because of the smoothed interaction term, the plot function will not # work here. (It looks like it should for some combination of parameters, # but I did not find a combination which did not produce an error; for example # plot(fit2, se = FALSE, pers=TRUE, rug=FALSE) # plot(fit2, se = FALSE, select=1) # each produced the same error # Error in "[.data.frame"(x$model, x$smooth[[i]]$term) : # undefined columns selected # ) # Instead, we will look at the regions as chosen (predicted) by the fit. # To see this, we plot contours of the predicted # probability over a grid of points pHat2 <- predict(fit2, newdata = xgrid, type="response") # Put the predicted probabilities in the grid to be used by the contour # program pHat2 <- matrix(pHat2, nrow = 21, ncol = 21, byrow = FALSE) # Note that the following puts up a new window in the Mac R environment # You may need to do something different for your environment (e.g. X11()) # See help("devices") or dev.cur() for more info. quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat2, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat2, levels = c(1/11, 10/11), add=T, lty=2, col="black", lwd=2) # # Get a look at the misclassification probabilities on the training data # table(checker.train[,"y"], predict(fit2, newdata= checker.train, type="response") > 0.5) # # Get a look at the misclassification probabilities on the test data # table(checker.test[,"y"], predict(fit2, newdata= checker.test, type="response") > 0.5) # ###################### # # Fit the logistic generalized model with interaction (still additive) predictor, but # dropping the s(x1) term since this did not seem significant, recall: summary(fit2) fit3 <- gam(y ~ s(x2) + s (x1*x2), family ="binomial", data = checker.train, control = gam.control(epsilon=1e-04,maxit= 100, globit= 100)) # # which at least converges # # So, look at results. Note significance levels associated with the coefficients # and the proportion of deviance explained. summary(fit3) # Now look at the regions as chosen (predicted) by the fit. pHat3 <- predict(fit3, newdata = xgrid, type="response") # Put the predicted probabilities in the grid to be used by the contour # program pHat3 <- matrix(pHat3, nrow = 21, ncol = 21, byrow = FALSE) # Note that the following puts up a new window in the Mac R environment # You may need to do something different for your environment (e.g. X11()) # See help("devices") or dev.cur() for more info. quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3, levels = c(1/11, 10/11), add=T, lty=2, col="black", lwd=2) # # Get a look at the misclassification probabilities on the training data # table(checker.train[,"y"], predict(fit3, newdata= checker.train, type="response") > 0.5) # # Get a look at the misclassification probabilities on the test data # table(checker.test[,"y"], predict(fit3, newdata= checker.test, type="response") > 0.5) # ###################### # # The same reasoning suggests # dropping the s(x1 * x2) term since this did not seem significant, recall: summary(fit3) fit4 <- gam(y ~ s(x2) , family ="binomial", data = checker.train, control = gam.control(epsilon=1e-04,maxit= 100, globit= 100)) # # which again converges # # Again, look at results. Note significance levels associated with the coefficients # and the proportion of deviance explained. summary(fit4) # Now look at the regions as chosen (predicted) by the fit. pHat4 <- predict(fit4, newdata = xgrid, type="response") # Put the predicted probabilities in the grid to be used by the contour # program pHat4 <- matrix(pHat4, nrow = 21, ncol = 21, byrow = FALSE) # Note that the following puts up a new window in the Mac R environment # You may need to do something different for your environment (e.g. X11()) # See help("devices") or dev.cur() for more info. quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4, levels = c(1/11, 10/11), add=T, lty=2, col="black", lwd=2) # None of which appear within the region of the data, so add contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4, nlevels = 4, add=T, lty=2, col="black", lwd=2) # # Which looks a lot like the logistic without any smooth! # # Get a look at the misclassification probabilities on the training data # table(checker.train[,"y"], predict(fit4, newdata= checker.train, type="response") > 0.5) # # Get a look at the misclassification probabilities on the test data # table(checker.test[,"y"], predict(fit4, newdata= checker.test, type="response") > 0.5) # ###################### # # Instead of dropping the s(x1 * x2) term, try dropping the x2 term. fit5 <- gam(y ~ s (x1 * x2), family ="binomial", data = checker.train) # # which again converges # # Again, look at results. Note significance levels associated with the coefficients # and the proportion of deviance explained. summary(fit5) # Now look at the regions as chosen (predicted) by the fit. pHat5 <- predict(fit5, newdata = xgrid, type="response") # Put the predicted probabilities in the grid to be used by the contour # program pHat5 <- matrix(pHat5, nrow = 21, ncol = 21, byrow = FALSE) # Note that the following puts up a new window in the Mac R environment # You may need to do something different for your environment (e.g. X11()) # See help("devices") or dev.cur() for more info. quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5, levels = c(1/11, 10/11), add=T, lty=2, col="black", lwd=2) # Also add contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5, levels = c(0.3, 0.45, 0.55, 0.7), add=T, lty=2, col="green", lwd=2) # # Which looks a lot like the logistic without any smooth! # # Get a look at the misclassification probabilities on the training data # table(checker.train[,"y"], predict(fit5, newdata= checker.train, type="response") > 0.5) # # Get a look at the misclassification probabilities on the test data # table(checker.test[,"y"], predict(fit5, newdata= checker.test, type="response") > 0.5) # ###################### # # Fit the logistic model with interaction (still additive) predictor. fit6 <- gam(y ~ s(x1,x2), family ="binomial", data = checker.train) # Look at results. Note significance levels associated with the coefficients. summary(fit6) # To see the regions chosen by this, we plot contours of the predicted # probability over a grid of points pHat6 <- predict(fit6, newdata = xgrid, type="response") # Put the predicted probabilities in the grid to be used by the contour # program pHat6 <- matrix(pHat6, nrow = 21, ncol = 21, byrow = FALSE) # Note that the following puts up a new window in the Mac R environment # You may need to do something different for your environment (e.g. X11()) # See help("devices") or dev.cur() for more info. quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat6, levels = c(0.5), add=T, lty=1, col="black", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat6, levels = c(1/11, 10/11), add=T, lty=2, col="black", lwd=2) # # Get a look at the misclassification probabilities on the training data # table(checker.train[,"y"], predict(fit6, newdata= checker.train, type="response") > 0.5) # # Get a look at the misclassification probabilities on the test data # table(checker.test[,"y"], predict(fit6, newdata= checker.test, type="response") > 0.5)