;;; This file contains some dimension reduction methods illustrated on ;;; the pixels data ;;; ;;; Authors: ;;; R.W. Oldford, 2004 ;;; ;;; plot the data (<- xvars (seq 1 19)) (<- pixelsVars (rest (list-variates pixels.tr) )) (scat-mat :data pixels.tr :vars pixelsvars :title "Segmentation Data -- 7 pixel types" :link? T ) pairs(pixels.tr[,xvars], main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]]) ;;; ;;; Find the directions for the principal components ;;; via the singular value decomposition of the X matrix ;;; ;;; First centre and scale the data pixels.x <- pixels.tr[,xvars] pixels.xbar <- apply(pixels.x,2,mean) pixels.sd <- apply(pixels.x,2,sd) pixels.sd ;;; Note that the variate REGION.PIXEL.COUNT has zero sd. ;;; Check out the five number summary of each variate apply(pixels.tr[,xvars],2,fivenum) ;;; Looks like this (the third) variate is identical for all pixels. ;;; So let's delete it. xvars <- xvars[-3] pixelsVars <- colnames(pixels.tr)[xvars] ;;; ;;; And start again ;;; pixels.x <- pixels.tr[,xvars] pixels.xbar <- apply(pixels.x,2,mean) pixels.sd <- apply(pixels.x,2,sd) pixels.x01 <- (pixels.x - matrix(pixels.xbar, nrow(pixels.x), ncol(pixels.x), byrow = TRUE) ) / matrix(pixels.sd, nrow(pixels.x), ncol(pixels.x), byrow = TRUE) ;;; ;;; Now let's look for the principal directions ;;; pixels.svd <- svd (pixels.x01) u <- pixels.svd$u d <- pixels.svd$d v <- pixels.svd$v X <- pixels.x01 ;;; ;;; Note that the difference is zero (subject to floating point arithmetic) ;;; sum(abs(X - u %*% diag(d) %*% t(v))) ;;; ;;; Note that the values of d are the square roots of the eigen values ;;; of t(X) %*% X and so are proportional to variances along the ;;; principal axes. d2 <- d^2 newwindow() par(mfcol=c(1,2)) plot(0,0,xlim=c(1,length(d)), ylim = c(0,1), main = "Standardized singular values", xlab = "i", ylab = "sing. val", type ="n") points(1:length(d), d / max(d)) lines(1:length(d), d/max(d), lty = 2) plot(0,0,xlim=c(1,length(d)), ylim = c(0,1), main = "Proportion of total variance", xlab = "i", ylab = "Cumulative proportion", type ="n") points(1:length(d),cumsum(d2/sum(d2))) lines(1:length(d), cumsum(d2/sum(d2)), lty = 2) ;;; ;;; Might get away with maybe 10 of the principal components. ;;; ;;; ;;; The data in the new coordinate system are ;;; xpc <- u %*% diag(d) colnames(xpc) <- paste("PC", 1:ncol(xpc),sep="_") newwindow() par(mfcol=c(1,1)) pairs(xpc, main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]]) ;;; ;;; Let's look at the contents of the vectors that correspond to the first ;;; two principal components v1 <- v[,1] v2 <- v[,2] ;;; ;;; We can associate the weights with the variates through the ;;; following formatting (which is less complicated than it looks) cat("First principal component: ", paste(prettyNum(v1[1:(length(v1)-1)], digits = 2), pixelsVars[1:(length(v1)-1)], "+"), paste(prettyNum(v1[length(v1)], digits = 2), pixelsVars[length(v1)]) ) ;;; cat("Second principal component: ", paste(prettyNum(v2[1:(length(v2)-1)], digits = 2), pixelsVars[1:(length(v2)-1)], "+"), paste(prettyNum(v2[length(v2)], digits = 2), pixelsVars[length(v2)]) ) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Try fitting a multinomial model using only the first 10 pcs ;;; ;;; ;;; Fit a multinomial model ;;; library(MASS) library(nnet) pixels.pc <- data.frame(xpc, pixelType = pixelType.tr) fitpc <- multinom(formula = pixelType ~ PC_1 + PC_2 + PC_3 + PC_4 + PC_5 + PC_6 + PC_7 + PC_8 + PC_9 + PC_10, data = pixels.pc, maxit = 1000 ) phat <- predict(fitpc, pixels.pc[,1:length(pixelsVars)], type="probs") types <- colnames(phat) pred.tr <- apply(phat,1, function(x){types[x==max(x)] } ) table(pixelType.tr, pred.tr) ;;; ;;; On the test data ;;; source(web441('pixels.test')) pixels.xte <- pixels.te[,xvars] pixels.x01te <- (pixels.xte - matrix(pixels.xbar, nrow(pixels.xte), ncol(pixels.xte), byrow = TRUE) ) / matrix(pixels.sd, nrow(pixels.xte), ncol(pixels.xte), byrow = TRUE) xpc.te <- as.matrix(pixels.x01te) %*% v colnames(xpc.te) <- colnames(xpc) phat <- predict(fitpc, xpc.te[,1:length(pixelsVars)], type="probs") pred.te <- apply(phat,1, function(x){types[x==max(x)] } ) table(pixelType.te, pred.te) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Try to get the crimcoords ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Get the separate groups (type of pixel) type1.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[1],pixelsVars] type2.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[2],pixelsVars] type3.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[3],pixelsVars] type4.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[4],pixelsVars] type5.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[5],pixelsVars] type6.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[6],pixelsVars] type7.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[7],pixelsVars] ;;; ;;; Summary statistics for each group n1 <- nrow(type1.tr) type1.xbar <- apply(type1.tr,2,mean) t1.cov <- cov(type1.tr) n2 <- nrow(type1.tr) type2.xbar <- apply(type2.tr,2,mean) t2.cov <- cov(type2.tr) n3 <- nrow(type1.tr) type3.xbar <- apply(type3.tr,2,mean) t3.cov <- cov(type3.tr) n4 <- nrow(type1.tr) type4.xbar <- apply(type4.tr,2,mean) t4.cov <- cov(type4.tr) n5 <- nrow(type1.tr) type5.xbar <- apply(type5.tr,2,mean) t5.cov <- cov(type5.tr) n6 <- nrow(type1.tr) type6.xbar <- apply(type6.tr,2,mean) t6.cov <- cov(type6.tr) n7 <- nrow(type1.tr) type7.xbar <- apply(type7.tr,2,mean) t7.cov <- cov(type7.tr) ;;; ;;; Get the within covariance matrix ;;; W <- (n1 -1 ) * t1.cov + (n2 -1 ) * t2.cov + (n3 -1 ) * t3.cov + (n4 -1 ) * t4.cov + (n5 -1 ) * t5.cov + (n6 -1 ) * t6.cov + (n7 -1 ) * t7.cov W <- W /(n1 + n2 + n3 + n4 + n5 + n6 + n7 - 7) ;;; ;;; Get the between B <- rbind((type1.xbar - pixels.xbar), (type2.xbar - pixels.xbar), (type3.xbar - pixels.xbar), (type4.xbar - pixels.xbar), (type5.xbar - pixels.xbar), (type6.xbar - pixels.xbar), (type7.xbar - pixels.xbar) ) ;;; B is not found as t(B) %*% diag(c(n1,n2,n3,n4,n5,n6,n7)) B ;;; The following saves a little computation (though since all n= 30 here ;;; there is no real need here to weight the sum by its sample sizes.) B <- t(B) %*% (B * c(n1,n2,n3,n4,n5,n6,n7)) B <- B /(length(types) - 1) ;;; ;;; We want the eigen vectors of W^{-1}B to give the crimcoords ;;; W_inv_B <- solve(W,B) ;;; ;;; which fails because W_inv is near singular. ;;; ;;; So instead, we find the Cholesky decomposition W = t(C) C and C is an ;;; upper triangular matrix C <- chol(W) ;;; ;;; check sum(abs( W - (t(C) %*% C))) ;;; ;;; Do the same for the Between D <- solve(t(C),B) E <- solve(t(C),t(D)) eigen <- eigen(E) ;;; ;;; no. positive real eigen values is r <- min((length(types) - 1), length(pixelsVars)) eigvals <- eigen$values[1:r] ;;; get rid of zero imaginary part eigvals <- as.real(eigvals) ;;; ;;; Have a look newwindow() par(mfcol=c(1,2)) plot(0,0,xlim=c(1,length(eigvals)), ylim = c(0,1), main = "Standardized eigen values", xlab = "i", ylab = "sing. val", type ="n") points(1:length(eigvals), eigvals / max(eigvals)) lines(1:length(eigvals), eigvals/max(eigvals), lty = 2) d2 <- eigvals plot(0,0,xlim=c(1,length(d2)), ylim = c(0,1), main = "Proportion of total F", xlab = "i", ylab = "Cumulative proportion", type ="n") points(1:length(d2),cumsum(d2/sum(d2))) lines(1:length(d2), cumsum(d2/sum(d2)), lty = 2) ;;; ;;; Crim directions taken first from ;;; eigen-vectors eigvecs <- eigen$vectors[,1:r] ;;; ;;; Again get rid of any complex part (take care to preserve dimensions) ;;; eigvecs <- matrix(as.real(eigvecs),nrow(eigvecs),ncol(eigvecs)) crimdirs <- matrix(as.real(solve(C,eigvecs)),nrow(eigvecs),ncol(eigvecs)) crimcoords <- as.matrix(pixels.tr[,pixelsVars]) %*% crimdirs colnames(crimcoords)<- paste("CC",1:r,sep="_") newwindow() par(mfcol=c(1,1)) pairs(crimcoords, main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]]) ;;; ;;; Compare to the same number of PC directions ;;; newwindow() par(mfcol=c(1,1)) pairs(xpc[,1:r], main = "Segmentation Data -- 7 pixel types", col = c("red", "blue", "darkgreen", "gray", "orange", "brown", "lightgreen")[pixels.tr[,"pixelType"]])