;;;  This file contains some dimension reduction methods illustrated on 
;;;  the pixels data
;;;      
;;;  Authors:
;;;       R.W. Oldford, 2004
;;;
;;; plot the data

(<- xvars  (seq 1 19))
(<- pixelsVars (rest  (list-variates pixels.tr) ))

(scat-mat  :data pixels.tr :vars pixelsvars :title "Segmentation Data -- 7 pixel types"  :link? T )

pairs(pixels.tr[,xvars], main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])


;;;
;;; Find the directions for the principal components
;;; via the singular value decomposition of the X matrix
;;;
;;; First centre and scale the data
pixels.x <- pixels.tr[,xvars]
pixels.xbar <- apply(pixels.x,2,mean)
pixels.sd <- apply(pixels.x,2,sd)
pixels.sd
;;; Note that the variate REGION.PIXEL.COUNT has zero sd.
;;; Check out the five number summary of each variate
apply(pixels.tr[,xvars],2,fivenum)

;;; Looks like this (the third) variate is identical for all pixels.
;;; So let's delete it.
xvars <- xvars[-3]
pixelsVars <- colnames(pixels.tr)[xvars]
;;;
;;; And start again
;;;

pixels.x <- pixels.tr[,xvars]
pixels.xbar <- apply(pixels.x,2,mean)
pixels.sd <- apply(pixels.x,2,sd)
pixels.x01 <- (pixels.x - 
                matrix(pixels.xbar,
                       nrow(pixels.x), 
                       ncol(pixels.x), 
                       byrow = TRUE)  ) /
                                matrix(pixels.sd,
                                       nrow(pixels.x), 
                                       ncol(pixels.x), 
                                       byrow = TRUE)


;;;
;;; Now let's look for the principal directions
;;;
pixels.svd <- svd (pixels.x01)

u <- pixels.svd$u
d <- pixels.svd$d
v <- pixels.svd$v

X <- pixels.x01
;;;
;;; Note that the difference is zero (subject to floating point arithmetic)
;;;

sum(abs(X  - u %*% diag(d) %*% t(v)))

;;;
;;;  Note that the values of d are the square roots of the eigen values
;;;  of t(X) %*% X  and so are proportional to variances along the
;;;  principal axes.
d2 <- d^2
newwindow()
par(mfcol=c(1,2))
plot(0,0,xlim=c(1,length(d)), ylim = c(0,1),
     main = "Standardized singular values", 
     xlab = "i", ylab = "sing. val", type ="n")
points(1:length(d), d / max(d))
lines(1:length(d), d/max(d), lty = 2)
plot(0,0,xlim=c(1,length(d)), ylim = c(0,1),
     main = "Proportion of total variance", 
    xlab = "i", ylab = "Cumulative proportion", type ="n")
points(1:length(d),cumsum(d2/sum(d2)))
lines(1:length(d), cumsum(d2/sum(d2)), lty = 2)

;;;
;;;  Might get away with  maybe 10 of the principal components.
;;;
;;;
;;;  The data in the new coordinate system are
;;;
xpc <- u %*% diag(d)
colnames(xpc) <- paste("PC", 1:ncol(xpc),sep="_")
newwindow()
par(mfcol=c(1,1))
pairs(xpc, main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])


;;;
;;;  Let's look at the contents of the vectors that correspond to the first
;;;  two principal components

v1 <- v[,1]
v2 <- v[,2]

;;;
;;;  We can associate the weights with the variates through the
;;;  following formatting (which is less complicated than it looks)

cat("First principal component: ",
    paste(prettyNum(v1[1:(length(v1)-1)], digits = 2), 
          pixelsVars[1:(length(v1)-1)], 
          "+"), 
    paste(prettyNum(v1[length(v1)], digits = 2), 
          pixelsVars[length(v1)])
    )

;;; 

cat("Second principal component: ",
    paste(prettyNum(v2[1:(length(v2)-1)], digits = 2), 
          pixelsVars[1:(length(v2)-1)], 
          "+"), 
    paste(prettyNum(v2[length(v2)], digits = 2), 
          pixelsVars[length(v2)])
    )

;;; 

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;;  Try fitting a multinomial model using only the first 10 pcs
;;;
;;;
;;;  Fit a multinomial model
;;;

library(MASS)
library(nnet)
pixels.pc <- data.frame(xpc, pixelType = pixelType.tr)

fitpc <- multinom(formula = pixelType ~ PC_1 + PC_2 + PC_3 + 
                                       PC_4 + PC_5 + PC_6 + 
                                       PC_7 + PC_8 + PC_9 + 
                                       PC_10, 
                          data = pixels.pc, 
                          maxit = 1000
                         )


phat <- predict(fitpc, pixels.pc[,1:length(pixelsVars)], type="probs")
types <- colnames(phat)
pred.tr <- apply(phat,1,
                     function(x){types[x==max(x)]
                                }
                       )


table(pixelType.tr, pred.tr)

;;;
;;;  On the test data
;;; 

source(web441('pixels.test'))
pixels.xte <- pixels.te[,xvars]
pixels.x01te <- (pixels.xte - 
                matrix(pixels.xbar,
                       nrow(pixels.xte), 
                       ncol(pixels.xte), 
                       byrow = TRUE)  ) /
                                matrix(pixels.sd,
                                       nrow(pixels.xte), 
                                       ncol(pixels.xte), 
                                       byrow = TRUE)


xpc.te <- as.matrix(pixels.x01te) %*% v
colnames(xpc.te) <- colnames(xpc)


phat <- predict(fitpc, xpc.te[,1:length(pixelsVars)], type="probs")
pred.te <- apply(phat,1,
                     function(x){types[x==max(x)]
                                }
                       )
table(pixelType.te, pred.te)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;;  Try to get the crimcoords
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;;
;;; Get the separate groups (type of pixel)

type1.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[1],pixelsVars]
type2.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[2],pixelsVars]
type3.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[3],pixelsVars]
type4.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[4],pixelsVars]
type5.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[5],pixelsVars]
type6.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[6],pixelsVars]
type7.tr <- pixels.tr[pixels.tr[,"pixelType"]== types[7],pixelsVars]

;;;
;;; Summary statistics for each group

n1 <- nrow(type1.tr)
type1.xbar <- apply(type1.tr,2,mean)
t1.cov <- cov(type1.tr)

n2 <- nrow(type1.tr)
type2.xbar <- apply(type2.tr,2,mean)
t2.cov <- cov(type2.tr)

n3 <- nrow(type1.tr)
type3.xbar <- apply(type3.tr,2,mean)
t3.cov <- cov(type3.tr)

n4 <- nrow(type1.tr)
type4.xbar <- apply(type4.tr,2,mean)
t4.cov <- cov(type4.tr)

n5 <- nrow(type1.tr)
type5.xbar <- apply(type5.tr,2,mean)
t5.cov <- cov(type5.tr)

n6 <- nrow(type1.tr)
type6.xbar <- apply(type6.tr,2,mean)
t6.cov <- cov(type6.tr)

n7 <- nrow(type1.tr)
type7.xbar <- apply(type7.tr,2,mean)
t7.cov <- cov(type7.tr)

;;;
;;;  Get the within covariance matrix
;;;

W <- (n1 -1 ) * t1.cov + (n2 -1 ) * t2.cov + (n3 -1 ) * t3.cov +
     (n4 -1 ) * t4.cov + (n5 -1 ) * t5.cov + (n6 -1 ) * t6.cov +
     (n7 -1 ) * t7.cov

W <- W /(n1 + n2 + n3 + n4 + n5 + n6 + n7 - 7)

;;;
;;; Get the between

B <- rbind((type1.xbar - pixels.xbar), (type2.xbar - pixels.xbar),
           (type3.xbar - pixels.xbar), (type4.xbar - pixels.xbar),
           (type5.xbar - pixels.xbar), (type6.xbar - pixels.xbar),
           (type7.xbar - pixels.xbar)
           )
;;;  B is not found as t(B) %*% diag(c(n1,n2,n3,n4,n5,n6,n7)) B
;;;  The following saves a little computation (though since all n= 30 here
;;;  there is no real need here to weight the sum by its sample sizes.)
B <- t(B) %*% (B * c(n1,n2,n3,n4,n5,n6,n7))

B <- B /(length(types) - 1)

;;;
;;;  We want the eigen vectors of W^{-1}B to give the crimcoords
;;;
W_inv_B <- solve(W,B)

;;;
;;;  which fails because W_inv is near singular.
;;;  
;;; So instead, we find the Cholesky decomposition W = t(C) C and C is an
;;; upper triangular matrix

C <- chol(W)
;;;
;;; check 
sum(abs( W - (t(C) %*% C)))
;;;
;;; Do the same for the Between
D <- solve(t(C),B)
E <- solve(t(C),t(D))

eigen <- eigen(E)
;;; 
;;; no. positive real eigen values is
r <- min((length(types) - 1), length(pixelsVars))
eigvals <- eigen$values[1:r]
;;;  get rid of zero imaginary part
eigvals <- as.real(eigvals)

;;;
;;; Have a look
newwindow()
par(mfcol=c(1,2))
plot(0,0,xlim=c(1,length(eigvals)), ylim = c(0,1),
     main = "Standardized eigen values", 
     xlab = "i", ylab = "sing. val", type ="n")
points(1:length(eigvals), eigvals / max(eigvals))
lines(1:length(eigvals), eigvals/max(eigvals), lty = 2)
d2 <- eigvals
plot(0,0,xlim=c(1,length(d2)), ylim = c(0,1),
     main = "Proportion of total F", 
    xlab = "i", ylab = "Cumulative proportion", type ="n")
points(1:length(d2),cumsum(d2/sum(d2)))
lines(1:length(d2), cumsum(d2/sum(d2)), lty = 2)

;;;
;;; Crim directions taken first from
;;; eigen-vectors

eigvecs <- eigen$vectors[,1:r]
;;;
;;; Again get rid of any complex part (take care to preserve dimensions)
;;;
eigvecs <- matrix(as.real(eigvecs),nrow(eigvecs),ncol(eigvecs))

crimdirs <- matrix(as.real(solve(C,eigvecs)),nrow(eigvecs),ncol(eigvecs))

crimcoords <- as.matrix(pixels.tr[,pixelsVars]) %*% crimdirs
colnames(crimcoords)<- paste("CC",1:r,sep="_")

newwindow()
par(mfcol=c(1,1))
pairs(crimcoords, main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])

;;;
;;;  Compare to the same number of PC directions
;;;

newwindow()
par(mfcol=c(1,1))
pairs(xpc[,1:r], main = "Segmentation Data -- 7 pixel types",
            col = c("red", "blue", "darkgreen", "gray", "orange",
                    "brown", "lightgreen")[pixels.tr[,"pixelType"]])