# This file contains some dimension reduction methods illustrated on # the pixels data # # Authors: # R.W. Oldford, 2004 # # # just a handy function to capture the pathname for the files we will load. web441 <- function(x) {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/', x, sep='') } # (in class we just loaded this file since the # machine wasn't connected to the internet). # # Get the data: source(web441('pixels.train')) # quartz() is the Mac OS call for a new window. # Change the call inside from quartz() to whatever the call is # for the OS you are using. e.g. X11() or pdf(), etc. newwindow <- function(x) {quartz()} # # plot the data xvars <- 1:19 pixelsVars <- colnames(pixels.tr)[xvars] newwindow() pairs(pixels.tr[,xvars], main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]]) # # Find the directions for the principal components # via the singular value decomposition of the X matrix # # First centre and scale the data pixels.x <- pixels.tr[,xvars] pixels.xbar <- apply(pixels.x,2,mean) pixels.sd <- apply(pixels.x,2,sd) pixels.sd # Note that the variate REGION.PIXEL.COUNT has zero sd. # Check out the five number summary of each variate apply(pixels.tr[,xvars],2,fivenum) # Looks like this (the third) variate is identical for all pixels. # So let's delete it. xvars <- xvars[-3] pixelsVars <- colnames(pixels.tr)[xvars] # # And start again # pixels.x <- pixels.tr[,xvars] pixels.xbar <- apply(pixels.x,2,mean) pixels.sd <- apply(pixels.x,2,sd) pixels.x01 <- (pixels.x - matrix(pixels.xbar, nrow(pixels.x), ncol(pixels.x), byrow = TRUE) ) / matrix(pixels.sd, nrow(pixels.x), ncol(pixels.x), byrow = TRUE) # # Now let's look for the principal directions # pixels.svd <- svd (pixels.x01) u <- pixels.svd$u d <- pixels.svd$d v <- pixels.svd$v X <- pixels.x01 # # Note that the difference is zero (subject to floating point arithmetic) # sum(abs(X - u %*% diag(d) %*% t(v))) # # Note that the values of d are the square roots of the eigen values # of t(X) %*% X and so are proportional to variances along the # principal axes. d2 <- d^2 newwindow() par(mfcol=c(1,2)) plot(0,0,xlim=c(1,length(d)), ylim = c(0,1), main = "Standardized singular values", xlab = "i", ylab = "sing. val", type ="n") points(1:length(d), d / max(d)) lines(1:length(d), d/max(d), lty = 2) plot(0,0,xlim=c(1,length(d)), ylim = c(0,1), main = "Proportion of total variance", xlab = "i", ylab = "Cumulative proportion", type ="n") points(1:length(d),cumsum(d2/sum(d2))) lines(1:length(d), cumsum(d2/sum(d2)), lty = 2) # # Might get away with maybe 10 of the principal components. # # # The data in the new coordinate system are # xpc <- u %*% diag(d) colnames(xpc) <- paste("PC", 1:ncol(xpc),sep="_") newwindow() par(mfcol=c(1,1)) pairs(xpc, main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]]) # # Let's look at the contents of the vectors that correspond to the first # two principal components v1 <- v[,1] v2 <- v[,2] # # We can associate the weights with the variates through the # following formatting (which is less complicated than it looks) cat("First principal component: ", paste(prettyNum(v1[1:(length(v1)-1)], digits = 2), pixelsVars[1:(length(v1)-1)], "+"), paste(prettyNum(v1[length(v1)], digits = 2), pixelsVars[length(v1)]) ) # cat("Second principal component: ", paste(prettyNum(v2[1:(length(v2)-1)], digits = 2), pixelsVars[1:(length(v2)-1)], "+"), paste(prettyNum(v2[length(v2)], digits = 2), pixelsVars[length(v2)]) ) # ##################################### # # Try fitting a multinomial model using only the first 10 pcs # # # Fit a multinomial model # library(MASS) library(nnet) pixels.pc <- data.frame(xpc, pixelType = pixelType.tr) fitpc <- multinom(formula = pixelType ~ PC_1 + PC_2 + PC_3 + PC_4 + PC_5 + PC_6 + PC_7 + PC_8 + PC_9 + PC_10, data = pixels.pc, maxit = 1000 ) phat <- predict(fitpc, pixels.pc[,1:length(pixelsVars)], type="probs") types <- colnames(phat) pred.tr <- apply(phat,1, function(x){types[x==max(x)] } ) table(pixelType.tr, pred.tr) # # On the test data # source(web441('pixels.test')) pixels.xte <- pixels.te[,xvars] pixels.x01te <- (pixels.xte - matrix(pixels.xbar, nrow(pixels.xte), ncol(pixels.xte), byrow = TRUE) ) / matrix(pixels.sd, nrow(pixels.xte), ncol(pixels.xte), byrow = TRUE) xpc.te <- as.matrix(pixels.x01te) %*% v colnames(xpc.te) <- colnames(xpc) phat <- predict(fitpc, xpc.te[,1:length(pixelsVars)], type="probs") pred.te <- apply(phat,1, function(x){types[x==max(x)] } ) table(pixelType.te, pred.te) ################################################################ # # Try to get the crimcoords # ################################################################ # # Get the separate groups (type of pixel) type1.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[1],pixelsVars] type2.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[2],pixelsVars] type3.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[3],pixelsVars] type4.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[4],pixelsVars] type5.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[5],pixelsVars] type6.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[6],pixelsVars] type7.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[7],pixelsVars] # # Summary statistics for each group n1 <- nrow(type1.tr) type1.xbar <- apply(type1.tr,2,mean) t1.cov <- cov(type1.tr) n2 <- nrow(type1.tr) type2.xbar <- apply(type2.tr,2,mean) t2.cov <- cov(type2.tr) n3 <- nrow(type1.tr) type3.xbar <- apply(type3.tr,2,mean) t3.cov <- cov(type3.tr) n4 <- nrow(type1.tr) type4.xbar <- apply(type4.tr,2,mean) t4.cov <- cov(type4.tr) n5 <- nrow(type1.tr) type5.xbar <- apply(type5.tr,2,mean) t5.cov <- cov(type5.tr) n6 <- nrow(type1.tr) type6.xbar <- apply(type6.tr,2,mean) t6.cov <- cov(type6.tr) n7 <- nrow(type1.tr) type7.xbar <- apply(type7.tr,2,mean) t7.cov <- cov(type7.tr) # # Get the within covariance matrix # W <- (n1 -1 ) * t1.cov + (n2 -1 ) * t2.cov + (n3 -1 ) * t3.cov + (n4 -1 ) * t4.cov + (n5 -1 ) * t5.cov + (n6 -1 ) * t6.cov + (n7 -1 ) * t7.cov W <- W /(n1 + n2 + n3 + n4 + n5 + n6 + n7 - 7) # # Get the between B <- rbind((type1.xbar - pixels.xbar), (type2.xbar - pixels.xbar), (type3.xbar - pixels.xbar), (type4.xbar - pixels.xbar), (type5.xbar - pixels.xbar), (type6.xbar - pixels.xbar), (type7.xbar - pixels.xbar) ) # B is not found as t(B) %*% diag(c(n1,n2,n3,n4,n5,n6,n7)) B # The following saves a little computation (though since all n= 30 here # there is no real need here to weight the sum by its sample sizes.) B <- t(B) %*% (B * c(n1,n2,n3,n4,n5,n6,n7)) B <- B /(length(types) - 1) # # We want the eigen vectors of W^{-1}B to give the crimcoords # W_inv_B <- solve(W,B) # # which fails because W_inv is near singular. # # So instead, we find the Cholesky decomposition W = t(C) C and C is an # upper triangular matrix C <- chol(W) # # check sum(abs( W - (t(C) %*% C))) # # Do the same for the Between D <- solve(t(C),B) E <- solve(t(C),t(D)) eigen <- eigen(E) # # no. positive real eigen values is r <- min((length(types) - 1), length(pixelsVars)) eigvals <- eigen$values[1:r] # get rid of zero imaginary part eigvals <- as.real(eigvals) # # Have a look newwindow() par(mfcol=c(1,2)) plot(0,0,xlim=c(1,length(eigvals)), ylim = c(0,1), main = "Standardized eigen values", xlab = "i", ylab = "sing. val", type ="n") points(1:length(eigvals), eigvals / max(eigvals)) lines(1:length(eigvals), eigvals/max(eigvals), lty = 2) d2 <- eigvals plot(0,0,xlim=c(1,length(d2)), ylim = c(0,1), main = "Proportion of total F", xlab = "i", ylab = "Cumulative proportion", type ="n") points(1:length(d2),cumsum(d2/sum(d2))) lines(1:length(d2), cumsum(d2/sum(d2)), lty = 2) # # Crim directions taken first from # eigen-vectors eigvecs <- eigen$vectors[,1:r] # # Again get rid of any complex part (take care to preserve dimensions) # eigvecs <- matrix(as.real(eigvecs),nrow(eigvecs),ncol(eigvecs)) crimdirs <- matrix(as.real(solve(C,eigvecs)),nrow(eigvecs),ncol(eigvecs)) crimcoords <- as.matrix(pixels.tr[,pixelsVars]) %*% crimdirs colnames(crimcoords)<- paste("CC",1:r,sep="_") newwindow() par(mfcol=c(1,1)) pairs(crimcoords, main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]]) # # Compare to the same number of PC directions # newwindow() par(mfcol=c(1,1)) pairs(xpc[,1:r], main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]])