# This file demonstrates linear discriminant function # theory. # No data is involved, just the densities. # Only two densities, common covariance matrix # # Authors: # H.A. Chipman, 2003 # R.W. Oldford, 2004 # # # The following sets up the grid of points on which the densities will # be evaluated. x1.grid <- rep(seq(0,8,l=40),48) x2.grid <- rep(seq(-5,18,l=48),rep(40,48)) x.grid <- cbind(x1.grid,x2.grid) # Here are the grid points plot(x.grid) # Note that the aspect ratio (vertical:horizontal ratio) and the scaling (one # unit up for every two across") means that angles in the original scale will # not display correctly in the picture. # For example, the lines # y = x and y = -x are orthogonal to one another in the plane # but do not appear so in the plot. # Add the lines to show this. abline(coef=c(0,1)) # intercept 0, slope 1 ... coef = c(intercept, slope) abline(coef=c(0,-1)) # intercept 0, slope -1 # You will need to resize the window so that the aspect ratio # has the visual scalling match on both axes. Try it. # just a handy function to capture the pathname for the files we will load. web441 <- function(x) {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/', x, sep='') } # get code for creating densities # (in class we just loaded this file since the # machine wasn't connected to the internet). source(web441('dmvnorm.R')) # First plot two multivariate normals as contour plots of the # density # Here are the parameter values; notice the common covariance matrix. mu0 <- c(3,3) mu1 <- c(6,8) Sigma <- cbind(c(1,2),c(2,20)) # Create the class 0 density den0 <- dmvnorm(x.grid,mu0,Sigma) # Plot its contours (6 of them) contour(unique(x1.grid),unique(x2.grid),matrix(den0,40,48),nlevels=6,col='red') # identify the mean as 0 points(mu0[1],mu0[2], pch="0") # Repeat for the class 1 density den1 <- dmvnorm(x.grid,mu1,Sigma) # The following ensures that the drawing occurs as if the existing plot # were `new'. Otherwise the existing plot would be wiped clean before # adding the rest. par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den1,40,48),nlevels=6,col='blue') points(mu1[1],mu1[2], pch="1") # The line segment from mu0 to mu1 lines(rbind(mu0,mu1)) # Halfway point half <- (mu0 + mu1) /2 points(half[1],half[2],pch=19,col='black', cex=2) # (Aside: point character 19 is a solid circle, cex is the character expansion factor) # We can plot the discriminating function by just calculating the # ratio of normal densities ... or log ratio # We explicitly request a contour at height=0, # via the "levels=c(0)" option, which specifies a vector of contour levels par(new=T) contour(unique(x1.grid),unique(x2.grid),levels=c(0), labels=c(""), matrix(log(den0)-log(den1),40,48),nlevels=1,lwd=4,col='black') # now let's do a similar thing with three densities... # the third density will have the same Sigma # but different mu mu2 <- c(1,10) den2 <- dmvnorm(x.grid,mu2,Sigma) # draw all three contours contour(unique(x1.grid),unique(x2.grid),matrix(den0,40,48),nlevels=6,col='red') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den1,40,48),nlevels=6,col='blue') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den2,40,48),nlevels=6,col='green') # Could do the classifications pairwise # den0 versus den1 par(new=T) contour(unique(x1.grid),unique(x2.grid), matrix(log(den0)-log(den1),40,48),nlevels=1,lwd=2,col='purple',pch="") # den0 versus den2 par(new=T) contour(unique(x1.grid),unique(x2.grid), matrix(log(den0)-log(den2),40,48),nlevels=1,lwd=2,col='orange',pch="") # den1 versus den2 par(new=T) contour(unique(x1.grid),unique(x2.grid), matrix(log(den1)-log(den2),40,48),nlevels=1,lwd=2,col='steelblue',pch="") # Or each versus the other two by max likelihood # redraw all three contours each time # den1 versus den0 OR den2 contour(unique(x1.grid),unique(x2.grid),matrix(den0,40,48),nlevels=6,col='red') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den1,40,48),nlevels=6,col='blue') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den2,40,48),nlevels=6,col='green') par(new=T) contour(unique(x1.grid),unique(x2.grid), matrix(log(den1)-pmax(log(den2),log(den0)),40,48),nlevels=1,lwd=2,col='blue',pch="1") # den2 versus den0 OR den1 contour(unique(x1.grid),unique(x2.grid),matrix(den0,40,48),nlevels=6,col='red') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den1,40,48),nlevels=6,col='blue') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den2,40,48),nlevels=6,col='green') par(new=T) contour(unique(x1.grid),unique(x2.grid), matrix(log(den2)-pmax(log(den1),log(den0)),40,48),nlevels=1,lwd=2,col='green',pch="2") # den0 versus den1 OR den2 contour(unique(x1.grid),unique(x2.grid),matrix(den0,40,48),nlevels=6,col='red') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den1,40,48),nlevels=6,col='blue') par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den2,40,48),nlevels=6,col='green') par(new=T) contour(unique(x1.grid),unique(x2.grid), matrix(log(den0)-pmax(log(den2),log(den1)),40,48),nlevels=1,lwd=2,col='red',pch="0") # This is a slightly better way of getting the class boundaries. At # each point a label for the most probable class, and then used to determine the # colour of each point in the grid. # # First redraw the contours (a little thicker this time) contour(unique(x1.grid),unique(x2.grid),matrix(den0,40,48),nlevels=6,col='red',lwd=2) par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den1,40,48),nlevels=6,col='blue',lwd=2) par(new=T) contour(unique(x1.grid),unique(x2.grid),matrix(den2,40,48),nlevels=6,col='green',lwd=2) # Now find the best. The value stored in best will be a unique, but arbitrary, # label for each density. The numbers chosen here are 2 for den0, 4 for den1 # and 3 for den2. When used as the value of the col argument in points, 2,4, and 3 # stand for the colours red, blue and green, respectively (which will match the colours # of the contour plots). (Why these numbers? white=0, black=1, red=2, green=3, blue=4. # Hint: think flatbed 4-colour pen plotters.) best <- apply(cbind(den0,den1,den2),1,function(x){c(2,4,3)[x==max(x)]}) points(x1.grid,x2.grid,col=best) # The border (based on these grid points) can be determined by the contour function # by giving it g-1 levels where g is the number of groups (here 3). par(new=T) contour(unique(x1.grid),unique(x2.grid), matrix(best,40,48),nlevels=2, lwd=1,col='black')