#  This file contains some dimension reduction methods illustrated on 
#  the pixels data
#      
#  Authors:
#       R.W. Oldford, 2004
#

#
# just a handy function to capture the pathname for the files we will load.
web441 <- function(x)
         {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/',
                 x,
                 sep='')
         }

# (in class we just loaded this file since the 
#  machine wasn't connected to the internet).
#
# Get the data:
source(web441('pixels.train'))

# quartz() is the Mac OS call for a new window.
# Change the call inside from quartz() to whatever the call is
# for the OS you are using. e.g. X11() or pdf(), etc.

newwindow <- function(x) {quartz()}

#

# plot the data
xvars <- 1:19
pixelsVars <- colnames(pixels.tr)[xvars]
newwindow()
pairs(pixels.tr[,xvars], main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])


#
# Find the directions for the principal components
# via the singular value decomposition of the X matrix
#
# First centre and scale the data
pixels.x <- pixels.tr[,xvars]
pixels.xbar <- apply(pixels.x,2,mean)
pixels.sd <- apply(pixels.x,2,sd)
pixels.sd
# Note that the variate REGION.PIXEL.COUNT has zero sd.
# Check out the five number summary of each variate
apply(pixels.tr[,xvars],2,fivenum)

# Looks like this (the third) variate is identical for all pixels.
# So let's delete it.
xvars <- xvars[-3]
pixelsVars <- colnames(pixels.tr)[xvars]
#
# And start again
#

pixels.x <- pixels.tr[,xvars]
pixels.xbar <- apply(pixels.x,2,mean)
pixels.sd <- apply(pixels.x,2,sd)
pixels.x01 <- (pixels.x - 
                matrix(pixels.xbar,
                       nrow(pixels.x), 
                       ncol(pixels.x), 
                       byrow = TRUE)  ) /
                                matrix(pixels.sd,
                                       nrow(pixels.x), 
                                       ncol(pixels.x), 
                                       byrow = TRUE)


#
# Now let's look for the principal directions
#
pixels.svd <- svd (pixels.x01)

u <- pixels.svd$u
d <- pixels.svd$d
v <- pixels.svd$v

X <- pixels.x01
#
# Note that the difference is zero (subject to floating point arithmetic)
#

sum(abs(X  - u %*% diag(d) %*% t(v)))

#
#  Note that the values of d are the square roots of the eigen values
#  of t(X) %*% X  and so are proportional to variances along the
#  principal axes.
d2 <- d^2
newwindow()
par(mfcol=c(1,2))
plot(0,0,xlim=c(1,length(d)), ylim = c(0,1),
     main = "Standardized singular values", 
     xlab = "i", ylab = "sing. val", type ="n")
points(1:length(d), d / max(d))
lines(1:length(d), d/max(d), lty = 2)
plot(0,0,xlim=c(1,length(d)), ylim = c(0,1),
     main = "Proportion of total variance", 
    xlab = "i", ylab = "Cumulative proportion", type ="n")
points(1:length(d),cumsum(d2/sum(d2)))
lines(1:length(d), cumsum(d2/sum(d2)), lty = 2)

#
#  Might get away with  maybe 10 of the principal components.
#
#
#  The data in the new coordinate system are
#
xpc <- u %*% diag(d)
colnames(xpc) <- paste("PC", 1:ncol(xpc),sep="_")
newwindow()
par(mfcol=c(1,1))
pairs(xpc, main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])


#
#  Let's look at the contents of the vectors that correspond to the first
#  two principal components

v1 <- v[,1]
v2 <- v[,2]

#
#  We can associate the weights with the variates through the
#  following formatting (which is less complicated than it looks)

cat("First principal component: ",
    paste(prettyNum(v1[1:(length(v1)-1)], digits = 2), 
          pixelsVars[1:(length(v1)-1)], 
          "+"), 
    paste(prettyNum(v1[length(v1)], digits = 2), 
          pixelsVars[length(v1)])
    )

# 

cat("Second principal component: ",
    paste(prettyNum(v2[1:(length(v2)-1)], digits = 2), 
          pixelsVars[1:(length(v2)-1)], 
          "+"), 
    paste(prettyNum(v2[length(v2)], digits = 2), 
          pixelsVars[length(v2)])
    )

# 

#####################################
#
#  Try fitting a multinomial model using only the first 10 pcs
#
#
#  Fit a multinomial model
#

library(MASS)
library(nnet)
pixels.pc <- data.frame(xpc, pixelType = pixelType.tr)

fitpc <- multinom(formula = pixelType ~ PC_1 + PC_2 + PC_3 + 
                                       PC_4 + PC_5 + PC_6 + 
                                       PC_7 + PC_8 + PC_9 + 
                                       PC_10, 
                          data = pixels.pc, 
                          maxit = 1000
                         )


phat <- predict(fitpc, pixels.pc[,1:length(pixelsVars)], type="probs")
types <- colnames(phat)
pred.tr <- apply(phat,1,
                     function(x){types[x==max(x)]
                                }
                       )


table(pixelType.tr, pred.tr)

#
#  On the test data
# 

source(web441('pixels.test'))
pixels.xte <- pixels.te[,xvars]
pixels.x01te <- (pixels.xte - 
                matrix(pixels.xbar,
                       nrow(pixels.xte), 
                       ncol(pixels.xte), 
                       byrow = TRUE)  ) /
                                matrix(pixels.sd,
                                       nrow(pixels.xte), 
                                       ncol(pixels.xte), 
                                       byrow = TRUE)


xpc.te <- as.matrix(pixels.x01te) %*% v
colnames(xpc.te) <- colnames(xpc)


phat <- predict(fitpc, xpc.te[,1:length(pixelsVars)], type="probs")
pred.te <- apply(phat,1,
                     function(x){types[x==max(x)]
                                }
                       )
table(pixelType.te, pred.te)

################################################################
#
#  Try to get the crimcoords
#
################################################################

#
# Get the separate groups (type of pixel)

type1.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[1],pixelsVars]
type2.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[2],pixelsVars]
type3.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[3],pixelsVars]
type4.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[4],pixelsVars]
type5.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[5],pixelsVars]
type6.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[6],pixelsVars]
type7.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[7],pixelsVars]

#
# Summary statistics for each group

n1 <- nrow(type1.tr)
type1.xbar <- apply(type1.tr,2,mean)
t1.cov <- cov(type1.tr)

n2 <- nrow(type1.tr)
type2.xbar <- apply(type2.tr,2,mean)
t2.cov <- cov(type2.tr)

n3 <- nrow(type1.tr)
type3.xbar <- apply(type3.tr,2,mean)
t3.cov <- cov(type3.tr)

n4 <- nrow(type1.tr)
type4.xbar <- apply(type4.tr,2,mean)
t4.cov <- cov(type4.tr)

n5 <- nrow(type1.tr)
type5.xbar <- apply(type5.tr,2,mean)
t5.cov <- cov(type5.tr)

n6 <- nrow(type1.tr)
type6.xbar <- apply(type6.tr,2,mean)
t6.cov <- cov(type6.tr)

n7 <- nrow(type1.tr)
type7.xbar <- apply(type7.tr,2,mean)
t7.cov <- cov(type7.tr)

#
#  Get the within covariance matrix
#

W <- (n1 -1 ) * t1.cov + (n2 -1 ) * t2.cov + (n3 -1 ) * t3.cov +
     (n4 -1 ) * t4.cov + (n5 -1 ) * t5.cov + (n6 -1 ) * t6.cov +
     (n7 -1 ) * t7.cov

W <- W /(n1 + n2 + n3 + n4 + n5 + n6 + n7 - 7)

#
# Get the between

B <- rbind((type1.xbar - pixels.xbar), (type2.xbar - pixels.xbar),
           (type3.xbar - pixels.xbar), (type4.xbar - pixels.xbar),
           (type5.xbar - pixels.xbar), (type6.xbar - pixels.xbar),
           (type7.xbar - pixels.xbar)
           )
#  B is not found as t(B) %*% diag(c(n1,n2,n3,n4,n5,n6,n7)) B
#  The following saves a little computation (though since all n= 30 here
#  there is no real need here to weight the sum by its sample sizes.)
B <- t(B) %*% (B * c(n1,n2,n3,n4,n5,n6,n7))

B <- B /(length(types) - 1)

#
#  We want the eigen vectors of W^{-1}B to give the crimcoords
#
W_inv_B <- solve(W,B)

#
#  which fails because W_inv is near singular.
#  
# So instead, we find the Cholesky decomposition W = t(C) C and C is an
# upper triangular matrix

C <- chol(W)
#
# check 
sum(abs( W - (t(C) %*% C)))
#
# Do the same for the Between
D <- solve(t(C),B)
E <- solve(t(C),t(D))

eigen <- eigen(E)
# 
# no. positive real eigen values is
r <- min((length(types) - 1), length(pixelsVars))
eigvals <- eigen$values[1:r]
#  get rid of zero imaginary part
eigvals <- as.real(eigvals)

#
# Have a look
newwindow()
par(mfcol=c(1,2))
plot(0,0,xlim=c(1,length(eigvals)), ylim = c(0,1),
     main = "Standardized eigen values", 
     xlab = "i", ylab = "sing. val", type ="n")
points(1:length(eigvals), eigvals / max(eigvals))
lines(1:length(eigvals), eigvals/max(eigvals), lty = 2)
d2 <- eigvals
plot(0,0,xlim=c(1,length(d2)), ylim = c(0,1),
     main = "Proportion of total F", 
    xlab = "i", ylab = "Cumulative proportion", type ="n")
points(1:length(d2),cumsum(d2/sum(d2)))
lines(1:length(d2), cumsum(d2/sum(d2)), lty = 2)

#
# Crim directions taken first from
# eigen-vectors

eigvecs <- eigen$vectors[,1:r]
#
# Again get rid of any complex part (take care to preserve dimensions)
#
eigvecs <- matrix(as.real(eigvecs),nrow(eigvecs),ncol(eigvecs))

crimdirs <- matrix(as.real(solve(C,eigvecs)),nrow(eigvecs),ncol(eigvecs))

crimcoords <- as.matrix(pixels.tr[,pixelsVars]) %*% crimdirs
colnames(crimcoords)<- paste("CC",1:r,sep="_")

newwindow()
par(mfcol=c(1,1))
pairs(crimcoords, main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])

#
#  Compare to the same number of PC directions
#

newwindow()
par(mfcol=c(1,1))
pairs(xpc[,1:r], main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])