#
# Tutorial introduction to
# the PairViz R package
#  ... Wayne Oldford 
#      (Waterloo, Canada)
#    & Catherine Hurley
#      (Maynooth, Ireland)


require(PairViz)

# See some stock demos
demo(package="PairViz")

#  PairViz is all about finding an
#  ordering of objects.
#
help(overview, package="PairViz")

#  For example, suppose we have n
# objects (labelled 1 to n), say

n <- 9

#  We may compute a sequence of these
#  numbers so that every pair appears
#  in the sequence once.

eseq(n)
#
# An alternative algorithm is also available
# simply named eseqa ("a" for "alternative")
eseqa(n)
#  which gives a different, independent,
#  sequence.
#
#  Such sequences are accomplished by
#  regarding the n objects as distinct
#  vertices (or nodes) on a complete
#  graph K_n.
#  
#   This can be made more explicit as

g <- mk_complete_graph(n)
g
dev.new(width=4,height=4)

# which can be viewed (via Rgraphviz package)

require(Rgraphviz)
plot(g, 
     main=paste("The complete graph on", 
     n, "nodes"))
     
plot(g, "circo", # arrange nodes in a circle
     main=paste("The complete graph on", 
     n, "nodes"))

eseq(n)  # walks the graph visiting every
         # edge once.
# this is possible iff the graph is
# "Eulerian" which occurs iff it is even
# i.e. every node has an even number of
# edges.  A complete graph on an odd number
# of nodes does.   
#
# Note that the same sequence could be
# achieved by operating directly on the
# graph
eulerian(g)

# which returns the labels of the node
# in order of the sequence.
# Eulerian can also be applied to the
# number n itself.
eulerian(n)

# To view the order, simply plot it as
dev.new(width=6, height=4)
plot(eulerian(n), type ="b", 
     col=rainbow(n)[eulerian(n)], 
     pch=19)

# Note that the sequence starts and ends at
# one.  In this way, the sequence is an 
# Eulerian "tour" in that it returns to the
# start.
#
# Notice also that node 1 appears frequently
# early in the sequence.
#
# We can ensure that all nodes appear in each
# in the first n indices, the second, etc.
# by using a Hamiltonian decomposition of the
# Eulerian.
hd <- hpaths(9)
hd

# As can be seen this returns a matrix where
# each row itself visits EVERY node of the
# graph ... a Hamiltonian tour.
# Each row is a different Hamiltonian tour.
hd[1,]
hd[2,]
hd[3,]
hd[4,]
# AND NO TWO numbers are beside each other
# more than once!
# The collection of these tours is called a
# Hamiltonian decomposition (of an Eulerian
# tour).
#
# The entire Hamiltonian decomposition can
# be produced as a vector simply as

hdv <- hpaths(9, matrix=FALSE)
hdv
#
# Compare this to the Eulerian we started
# with
dev.new(width=6, height=8)
par(mfrow=c(2,1))

plot(eulerian(n), type ="b", main="Eulerian", 
     col=rainbow(n)[eulerian(n)], 
     pch=19)
abline(v=c(9,18,27,36),col="grey",lty=2,lwd=2)

plot(hdv, type ="b", main="Hamiltonian", 
     col=rainbow(n)[hdv], 
     pch=19)
abline(v=c(9,18,27,36),col="grey",lty=2,lwd=2)

# Note that the mapping of nodes to numbers
# is arbitrary.  If there was a preferred
# order for the first Hamiltonian, we could
# enforce that by simply renumbering the
# nodes as appropriate.
# PairViz provides a function to accomplish
# this.

hd2 <- permute_hpaths(1:n,paths=hd)
hd2[1,]
hd2[2,]
hd2[3,]
hd2[4,]

# The first argument is the preferred sequence,
# in this case simply 1 to n.  Could have been
# the numbers 1 to n in ANY order.

# Can also be returned as a vector:
hd2v <- permute_hpaths(1:n,paths=hd,
                       matrix=FALSE)
                       
# Compare the two
dev.new(width=6, height=8)
par(mfrow=c(2,1))

plot(hdv, type ="b", 
     main="Original Hamiltonian Decomposition", 
     col=rainbow(n)[hdv], 
     pch=19)
abline(v=c(9,18,27,36),col="grey",lty=2,lwd=2)

plot(hd2v, type ="b", 
     main="Permuted Hamiltonian", 
     col=rainbow(n)[hd2v], 
     pch=19)
abline(v=c(9,18,27,36),col="grey",lty=2,lwd=2)

# Note:
# This is possible because the COMPLETE
# graph on an ODD number of nodes is an EVEN
# graph and hence an Eulerian exists.
#
# If n is EVEN, then the graph is NOT EVEN
# and NO Eulerian exists. 
#
# It is known however, that we can still
# have a Hamiltonian decomposition of
# a complete graph on an even number of
# vertices.
#
# In this case, the Hamiltonians are paths,
# not cycles and so do NOT return to their
# origin.

hd8 <- hpaths(8,matrix=FALSE)
hd8[1,]
hd8[2,]
hd8[3,]
hd8[4,]

#
# As a vector
#
hd8v <- hpaths(8,matrix=FALSE)
#
# And look at paths:
dev.new(width=6, height=4)
plot(hd8v, type ="b", 
     col=rainbow(8)[hd8v], pch=19,
     main="Decomposition into paths")
abline(v=c(8,16,24,32),col="grey",lty=2,lwd=2)
#
# Note that some pairs are repeated if these
# paths are joined.
#
# Or make sure each path starts at the same
# node (i.e. could cycle back there):
#
hd8c <- hpaths(8,cycle =TRUE)
hd8c[1,]
hd8c[2,]
hd8c[3,]
hd8c[4,]

#
# As a vector
#
hd8cv <- hpaths(8,cycle =TRUE,matrix=FALSE)
plot(hd8cv, type ="b", 
    col=rainbow(8)[hd8cv], 
    pch=19,
    main="Decomposition into returning paths")
abline(v=c(8,16,24,32),col="grey",lty=2,lwd=2)

  
# Compare the two
dev.new(width=6, height=8)
par(mfrow=c(2,1))

plot(hd8v, type ="b", 
     col=rainbow(8)[hd8v], pch=19,
     main="Decomposition into paths")
abline(v=c(8,16,24,32),col="grey",lty=2,lwd=2)


plot(hd8cv, type ="b", 
     main="Decomposition into returning paths", 
     col=rainbow(n)[hd8cv], 
     pch=19)
abline(v=c(8,16,24,32),col="grey",lty=2,lwd=2)

# Joining these paths effectively added n/2 
# edges to the graph (repeated n/2 edges).
# The result is an EVEN graph and so an Eulerian
# exists:

eseq(8)

plot(eseq(8), type ="b", 
     col=rainbow(8)[eseq(8)], pch=19,
     main="Eulerian on extended complete graph")
abline(v=c(8,16,24,32),col="grey",lty=2,lwd=2)


plot(hd8cv, type ="b", 
     main="Decomposition into returning paths", 
     col=rainbow(n)[hd8cv], 
     pch=19)
abline(v=c(8,16,24,32),col="grey",lty=2,lwd=2)

# 
# There are functions in PairViz to force the
# graph to be even.
# 
k8 <- mk_complete_graph(8)
k8e <- mk_even_graph(k8)

dev.new(width=6, height=8)
par(mfrow=c(2,1))
plot(k8,"circo",main="Complete graph on 8 nodes: K_8")

plot(k8e,"circo",main="Extending the complete graph on 8 nodes: K_8e")

# Note 4 extra edges: (1,2), (3,8), (4,7),(5,6)
#
# mk_even_graph will work on any graph.
#
##########
#
#  What if the edges had weights?
#
#  Example, distances between European cities
#  
labels(eurodist)
dist <- eurodist

# which is an R distance matrix.
#
# Number of cities is 
ncities <- length(labels(dist))

# We may, choose a default Eulerian on this as
#
eDefault <- eseq(ncities)

# Or we might use the distances somehow to
# get a preferred ordering.
#
# The edge weights corresponding to the 
# distances can be determined for any path
# using path_weights(...)
#
# E.g.
path <- c(1, 2, 4, 1)
path_weights(dist,path)
# contains 3 distances 1 -> 2, 2 -> 4, 4 -> 1.
#
# All of the edge weights 
# (for the complete graph) are had by

dist2edge(dist)

# in some order (not necessarily following any
# specific path)

# PairViz offers a "greedy" Eulerian
# which prefers wherever possible to put the
# smallest edges first.  
#
# If given a distance matrix as its argument,
# these distances are used as edge weights.
#

eGreedy <- eulerian(dist)

# The effect of this greedy Eulerian over the
# default (which ignores weights) can be seen
# by looking at the path_weights for each.
path_weights(dist, eDefault)

path_weights(dist, eGreedy)

# The greedy method tries to front load the path
# with small weights.
#
# So if we didn't want the whole path, we might
# just take a partial path, say,
eGreedy[1:20]

# so as to have small weights, 
path_weights(dist, eGreedy[1:20])

# compared to
path_weights(dist, eDefault[1:20])

#
# Visually, 

plot(path_weights(dist, eDefault), type ="l", 
     ylab="Edge distance",
     col="grey70",
     main="Default Eulerian")
points(path_weights(dist, eDefault), 
     pch=21, 
     col="steelblue")
     
plot(path_weights(dist, eGreedy), type ="l", 
     ylab="Edge distance",
     col="grey70",
     main="Greedy Eulerian")
points(path_weights(dist, eGreedy), 
     pch=21, 
     col="steelblue")

#     
# Note that the algorithm is "greedy" and so
# does not necessarily get the smallest
# distances in the first subset.
#
#
#
#  What about Hamiltonians taking advantage
#  of the weights?
#
#  Also have a weighted hpaths function

hdw <- weighted_hpaths(dist)
hdw[1,]
hdw[2,]
hdw[3,]
#
# etc.
# This is effected by first solving a
# travelling salesman problem, using that
# as the first hamiltonian, then ordering
# the remaining hamiltonians in the
# decomposition in order of increasing total
# path_weight.

hdwv <- weighted_hpaths(dist,matrix=FALSE)

plot(path_weights(dist, hpaths(ncities,matrix=FALSE)), type ="l", 
     ylab="Edge distance",
     col="grey70",
     main="Default Hpaths")
points(path_weights(dist, hpaths(ncities,matrix=FALSE)), 
     pch=21, 
     col="steelblue")
     
plot(path_weights(dist, hdwv), type ="l", 
     ylab="Edge distance",
     col="grey70",
     main="Weighted Hamiltonian Decomposition")
points(path_weights(dist, hdwv), 
     pch=21, 
     col="steelblue")
     
#
# And compare to Greedy Eulerian
#
plot(path_weights(dist, eGreedy), type ="l", 
     ylab="Edge distance",
     col="grey70",
     main="Greedy Eulerian")
points(path_weights(dist, eGreedy), 
     pch=21, 
     col="steelblue")
     
plot(path_weights(dist, hdwv), type ="l", 
     ylab="Edge distance",
     col="grey70",
     main="Weighted Hamiltonian Decomposition")
points(path_weights(dist, hdwv), 
     pch=21, 
     col="steelblue")

# Aside, there seems to be a mistake in 
# the weighted hpaths in that matrix=TRUE or FALSE
# does not seem to return the same
# ordering, however this is not true.
# Each Hamiltonian can be cycled to have the joins
# take place at any node in the cycle 
# (e.g. 3 or 18)
#
#  
#
#########################################################
# 
# So, what could we do with this functionality?
#
# Could use it to order components in a plot layout
#   1. To lessen the negative effects of
#      pairwise order
#   2. To better see interesting features.
#
#########################################################
# 
#
#  1. Star glyphs
# 
#  Motor trend cars data:
#

mt7 <- as.matrix(mtcars)[,1:7]

# Default order
# 
col <- "grey"

dev.new(width=5,height=5)
stars(mt7, len = 0.8,
      col.stars=rep(col, nrow(mt7)),
      key.loc = NULL,
      label=1:nrow(mt7),
      cex=.6,
      radius = FALSE,
      main="Default order")
#      
# get different orders
h <- hpaths(7)

dev.new(width=5,height=5)
col <- "thistle"
stars(mt7[,h[1,]], len = 0.8,
      col.stars=rep(col, nrow(mt7)),
      key.loc = NULL,
      label=1:nrow(mt7),
      cex=.6,
      radius = FALSE,
      main="First Hamiltonian order")

dev.new(width=5,height=5)
col <- "seagreen"
stars(mt7[,h[2,]], len = 0.8,
      col.stars=rep(col, nrow(mt7)),
      key.loc = NULL,
      label=1:nrow(mt7),
      cex=.6,
      radius = FALSE,
      main="Second Hamiltonian order")
      
#  Perception of grouping may change with ordering.
#  Better to have all pairs appear at once.
#

hv <- hpaths(7,matrix=FALSE)
dev.new(width=5,height=5)
col <- "steelblue"
stars(mt7[,hv], len = 0.8,
      col.stars=rep(col, nrow(mt7)),
      key.loc = NULL,
      label=1:nrow(mt7),
      cex=.6,
      radius = FALSE,
      main="Default Hamiltonian decomposition")
      
estars <- eseq(7)
dev.new(width=5,height=5)
col <- "steelblue"
stars(mt7[, estars], len = 0.8,
      col.stars=rep(col, nrow(mt7)),
      key.loc = NULL,
      label=1:nrow(mt7),
      cex=.6,
      radius = FALSE,
      main="Default Eulerian")
      
#
# In both cases, all pairs appear.
#
#  
#  How about weights?
#
mtr <- cor(mt7,method="spearman")
mtre <- eulerian(-mtr)
mtre
ord <- mtre[-length(mtre)] # remove the last element
                           # not needed for radial axes
                        

dev.new(width=5,height=5)
col <- "salmon"
stars(mt7[, ord], len = 0.8,
      col.stars=rep(col, nrow(mt7)),
      key.loc = NULL,
      label=1:nrow(mt7),
      cex=.6,
      radius = FALSE,
      main="Greedy Spearman rho Eulerian")
 
# Emphasizes positive monotonic relations by putting the
# variables side by side and early in the sequence.
# 
# Could order the positions as well
# to see how much visual corresponds to 
# clustering algorithm.  Here we place them
# on a dendrogram from average linkage clustering.
#

dev.new(width=10,height=5)
h <- hclust(dist(scale(mt7)), "average")
plot(h,main="",xlab="",cex=.7,cex.lab=1,axes=FALSE,labels=NULL)
axis(2,cex.axis=.7)

stars(mt7[h$order,ord], col.stars=rep(col, nrow(mt7)),
locations=cbind((1:nrow(mt7)),
rep(-3,nrow(mt7))),
      main = "",radius = FALSE,mar=c(0,3,0,0),labels=NULL,len=.65,add=TRUE)



##########
#
#  2. Parallel coordinates
#
#

#
# Sleep data setup
# sleep measurements on various mammals
# Data found in "alr3" package
#
require(alr3)
data(sleep1)
#
# First set up for our purposes
#
# Remove NAs
data <- na.omit(sleep1)

# Use logs on brain and body weights

data[,4:5] <- log(data[,4:5])

#
# Shorter variable names

colnames(data) <- c("SW","PS" ,"TS" ,"Bd","Br",
						          "L","GP","P" ,"SE" , "D"  )

#
# default parallel coordinate plot
parcoord(data)

# Separate colours for points
# PairViz provides a function to reduce the saturation
# of any colour
cols3 <- desaturate_color(c("red","navy","lightblue3"),
											       .6)
#
# Get as many cols as we need, dividing data into
# three groups based on the maximum lifespan "L" or 6
#
cols <- cols3[cut(rank(data[,"L"]),
	                3,labels=FALSE)] 


parcoord(data,col=cols,lwd=1.2,main="default order")

#
# Get Hamiltonian decomposition
hpc <- hpaths(ncol(data), matrix=FALSE)
#
# Use it.
parcoord(data[,hpc],col=cols,lwd=1.2,
					 main="Hamiltonian Decomposition")
					 
#
# That simple. Now all pairs appear.
# But looks a mess. 
# And this data has only 10 variables.
# (Note some pairs are repeated.)
#

#  
#  How about weights?
#
slcor <- cor(data,method="spearman")
slcore <- eulerian(-slcor)
slcore

parcoord(data[,slcore],col=cols,lwd=1.2,
					 main="Greedy Eulerian: Spearman's Rho")
					 
parcoord(data[,
              slcore[1:(length(slcore)/2)]],
         col=cols,lwd=1.2,
					 main="First half of Greedy Eulerian: Spearman's Rho")

# Straightforward use of standard.
#
# PairViz also puts forward a "guided parallel coords"
# plot.
#
help(guided_pcp)

#
#  Need to get the edge weights.
#  

corwts <- dist2edge(slcor)
#
# Separate weights into positive and negative
#
edgew <- cbind(corwts*(corwts>0), corwts*(corwts <0))


dev.new(width=9.5,height=3)
par(tcl = -.2, cex.axis=.45,mgp=c(3,.3,0),cex.main=.8)

guided_pcp(data, edgew, path= slcore,
						 pcp.col=cols, lwd=1.4,
           bar.col = desaturate_color(c("blue","orange"),
                                      .4),
           main="Eulerian on Spearman correlation",
           bar.ylim=c(-1,1),
           bar.axes=TRUE)
#
# Focus on first 26
#

dev.new(width=9.5,height=3)
par(tcl = -.2, cex.axis=.45,mgp=c(3,.3,0),cex.main=.8)

guided_pcp(data, edgew, path= slcore,
						 pcp.col=cols, lwd=1.4,
           bar.col = desaturate_color(c("blue","orange"),
                                      .4),
           main="Eulerian on Spearman correlation",
           bar.ylim=c(-1,1),
           bar.axes=TRUE,
           zoom=1:26)
#
# Focus on any contiguous subsequence
#
sub <- 20:40

guided_pcp(data, edgew, path= slcore,
						 pcp.col=cols, lwd=1.4,
           bar.col = desaturate_color(c("blue","orange"),
                                      .4),
           main="Eulerian on Spearman correlation",
           bar.ylim=c(-1,1),
           bar.axes=TRUE,
           zoom=sub)

#
# Can find the "best" path
# The following will take a while
# o <- find_path(-as.dist(slcor),order_best,maxexact=10)
#
# Answer is 
o <- c(6,  5,  4,  7,  9, 10,  8,  1,  3,  2)

# device set up
#
dev.new(width=6.5,height=3)
par(tcl = -.2, cex.axis=.45,mgp=c(3,.3,0))

guided_pcp(data, edgew, path= o,
						 pcp.col=cols, lwd=1.4,
           bar.col = desaturate_color(c("blue","purple"),
                                      .5),
           main="Best Hamiltonian on Spearman correlation",
           bar.ylim=c(-1,1),
           bar.axes=TRUE)
#
#
# More interesting are the use of scagnostics
# or "Scatterplot diagnostics/cognostics"
# from the scagnostic package.
require(scagnostics)
library(scagnostics)
library(RColorBrewer)

#_________utility functions__________

pc_scag_title <- function(title="Parallel Coordinates",sel_scag)
 {if (length(sel_scag)>1) 
   {if (length(sel_scag)==length(scags))
      {title <- paste(title, 
      	               "on all scagnostics.", 
      	               sep=" ")
   	} else {title <- paste(title, 
      	               "on scagnostics:", 
      	               sep=" ")
   		     for (scag in sel_scag) {
	         title <- paste(title,scag, sep=" ")}}
    } else {title <-  	paste(title, 
      	               "on scagnostics:", 
      	               sep=" ")
      	      title <- paste(title,sel_scag, sep=" ")}

	 title}



select_scagnostics <- function(sc,names){
	sc1 <- sc[,names]
	class(sc1) <- class(sc)
	return(sc1)
	}

#___________________


# The base data for finding weights.
sc <- t(scagnostics(data))

# Scag names and colours
# These are preserved throughout in this order.
scags <- colnames(sc)

#
# There are 9 different scagnostic measures
# Assign a colour to each.
#
scag_cols <- rev(brewer.pal(9, "Pastel1"))

names(scag_cols) <- scags

# ----scagnostics legend --------------

dev.new(width=1.5,height=3)
par(mar=c(0,4.5,0,.5))

barplot(rep(1,9),col=scag_cols,
				horiz=TRUE,space=0,
				axes=FALSE,names.arg=scags,
				las=2,cex.names=.8)
#____________________________________________
# 
# Layout to find outliers
#
sel_scag <- c("Outlying")

sc1 <- select_scagnostics(sc,sel_scag)

# Following gets best Hamiltonian; takes a while.
# o <- find_path(-sc1, order_best, maxexact=10)
# answer is
o <-c( 2 , 4 , 1,  5 , 6,  7 , 3 , 8,  9, 10) 


dev.new(width=6.5,height=3)
par(tcl = -.2, cex.axis=.8,mgp=c(3,.3,0))

guided_pcp(data, sc1, path= o,
						 pcp.col=cols, lwd=1.4,
           main=pc_scag_title("Best Hamiltonian",
                              sel_scag),
           bar.col = scag_cols[sel_scag],
           legend=FALSE,
           bar.axes=TRUE,
           bar.ylim=c(0,.6)
           )


#____________________________________________
# 
# Layout to find clusters
#
sel_scag <- c("Clumpy")

sc1 <- select_scagnostics(sc,sel_scag)
# o <- find_path(-sc1, order_best,maxexact=10)
o <- c(5 , 9,  2,  7 , 3 , 1 , 8,  6, 10,  4) # "clumpy"

dev.new(width=6.5,height=3)
par(tcl = -.2, cex.axis=.8,mgp=c(3,.3,0))

guided_pcp(data, sc1, path= o,
						 pcp.col=cols, lwd=1.4,
           main=pc_scag_title("Best Hamiltonian",
                              sel_scag),
           bar.col = scag_cols[sel_scag],
           legend=FALSE,
           bar.axes=TRUE,
           bar.ylim=c(0,.6)
           )


#____________________________________________
#
#  Striated and/or Sparse
#

sel_scag <- c("Striated","Sparse")

sc1 <- select_scagnostics(sc,sel_scag)
# o <- find_path(-sc1,   order_best,maxexact=10)
o <-  c(4, 10 , 2 , 9 , 1,  7 , 8,  6 , 5 ,3) #most"Sparse"+ Striated"


dev.new(width=6.5,height=3)
par(tcl = -.2, cex.axis=.8,mgp=c(3,.3,0))

guided_pcp(data, sc1, path= o,
						 pcp.col=cols, lwd=1.4,
           main=pc_scag_title("Best Hamiltonian",
                              sel_scag),
           bar.col = scag_cols[sel_scag],
           legend=FALSE,
           bar.axes=TRUE,
           bar.ylim=c(0,.6)
           )
#____________________________________________
#
#
#

##########
#
#  3. Multiple Comparisons
#
#
# Get the right cancer data
data(cancer, package="PairViz")

cancer

#
# Prepare data
#
bx <- with(cancer, split(sqrt(Survival),Organ))
#
# Analysis of Variance
#
a <-  aov(sqrt(Survival) ~ Organ,data=cancer)

# Standard simultaneous confidence intervals
# (Using Tukey's honest significant differences.)
#
dev.new(height=5, width=4)
par(mar=c(3,4.8,2,1),cex.main=.75,
    mgp=c(1.5, .5, 0),cex.axis=.6,cex.lab=.75)
    
plot(TukeyHSD(a,conf.level = 0.95),las=1,tcl = -.3)
abline(v = 0, col = "grey60", lty=3)
#
# More recently proposed method
#

library(HH)
mm <- glht.mmc(a, linfct = mcp(Organ = "Tukey"))

dev.new(height=6.5, width=9)
par(mgp=c(2, .5, 0),mar=c(3,4,3,8))

plot(mm, x.offset=1,
     main="95% family-wise confidence level",
     main2.method.phrase="",
     xlab = "Differences in mean sqrt Survival",
     col.mca.signif="red")
     
par(mgp=c(3, .5, 0))
title(ylab="Mean sqrt Survival")

#
# Now PairViz approached based on ordering.
#

#
# Some nice colours (predetermined)

cols <- c("#FF8080" ,"#8080FF", 
          "#80FFFF" ,"#EEEE77", 
          "#FFDAE2")

dev.new(height=4.5, width=9.5)
par(cex.axis=.75, cex.main = 1.0, cex.lab=1)
par(mar=c(3,5,3,5))


mc_plot(bx,a,
				 main="Pairwise comparisons of cancer types", 
				 ylab="Sqrt Survival",col=cols)


##########
#
#  4. Other plots, other graphs.
#
#

# Stepwise regression
#
# A special graph needs to be made
# based on a hypercube.
#
# e.g.
g <- mk_hypercube_graph(c("A","B","C"))
dev.new()
plot(g)

#
#  Here's a nice function to handle stepwise
#  regression fits.
#  Creates a graph, fits all models, and attaches
#  results to nodes.

regression_graph <- function(y,x)
  {
  	g <- mk_hypercube_graph(colnames(x)) 
  	nodeDataDefaults(g, "residuals") <- 0
  	nodeDataDefaults(g, "sse") <- 0
   edgeDataDefaults(g,"weight") <- 1

  
    preds <- nodes(g)
    res <-sapply(preds,function(z) {
      p <- strsplit(z,character(0))[[1]]
	  if (p[1]=="0") 
	     r <- y - mean(y)
	  else {d <- as.data.frame(x[,p])
	  r <- lm(y ~., data=d)$residuals}
	  
	  return(r)})
	  
	  
    colnames(res) <- preds
    for (i in 1:ncol(res)){
      r <- res[,i]
      nodeData(g,nodes(g)[i],"residuals"	) <- list(r)
      nodeData(g,nodes(g)[i],"sse"	) <- sum(r*r)
      }
    for (n in nodes(g)) {
    	m <- edges(g,n)[[1]]
    	ew <- unlist(nodeData(g,m,"sse")) - nodeData(g,n,"sse")[[1]]
    	edgeData(g,n,m,"weight") <- abs(ew)
    	}  	

    return(g)
  	}
# 	
#
# Let's look again at the sleep1 data.
# Stored previously as data
#
# We'll set it up again using fewer variables
# and hopefully fewer NAs
#

library(alr3)
data(sleep1)
data <- sleep1
# logging the brain and body weights
data[,c(4,5)] <- log(data[,c(4,5)])
# Shorter variable names.
colnames(data) <- c("SW", "PS", "TS", "Bd",
                    "Br", "L", "GP", "P", 
                     "SE", "D")

#data <- na.omit(data) # before
#  Take fewer variables now, then na,omit
data <- data[,c(1,2,4,5,6)]
data <- na.omit(data)

#  Response will be Log Brain Weight (now 4th)
y <- data[,"Br"]

# Everything else will be explanatory.
x <- data[,-4]
#
# regression graph wants single letter variables
colnames(x) <-c("A","B","C","D")

g <- regression_graph(y,x)
dev.new()
plot(g)
#
# Note this happens to be an even graph.
#

res <- sapply(nodeData(g,attr="residuals"),identity) # matrix of residuals

# g is  eulerian, since it is Q4
eulerian(g,weighted=FALSE)

eulerian(g,weighted=TRUE)

# This version uses weights, and picks as a 
# start a node connected to the loweset weight edge,
# which happens to be node "ACD"
# To begin at the full model, as in a backwards 
# elimination, we specify start as "ABCD"

o <- eulerian(g,start="ABCD")
o
#
# Get the edge weights.

ew <- NULL	
for (i in 2:length(o)) {
	ew <- c(ew,
         nodeData(g,o[i],"sse")[[1]] - nodeData(g,o[i-1],"sse")[[1]])}
         
         
#
# Set up device
#
dev.new(width=8,height=3)
par(tcl = -.2, cex.axis=.4,
    mgp=c(3,.3,0),cex.main=.8)


pcp0 <- function(...){pv_pcp(...)
	                    abline(h=0,col="grey70",lwd=2)}

cols <- desaturate_color(rainbow(10,alpha=0.7))
cols <- cols[cut(rank(res[,"ABCD"]),10,labels=FALSE)]

#
# Can now call guided_pcp.
#  First need to get a corrected one
# to properly handle scales
#
source("guidedpcp.R")

guided_pcp(res,path=match(o,nodes(g)),
           scale=FALSE, pathw=ew,
           bar.col="#FDCDAC",
           pcpfn=pcp0, lwd=2, pcp.col=cols,
           main="Sleep data: Model residuals.",
           pc.mar=c(1,1,1.5,1))

#
# Use colour to identify outliers
#

cols <- rep("grey70",nrow(data))
cols[rownames(data) =="Human"] <- "red"
cols[rownames(data) =="Asian_elephant"  ] <- "black"
cols[rownames(data) =="Big_brown_bat"    ] <- "purple"
cols[rownames(data) =="Little_brown_bat"      ] <- "purple"
cols[rownames(data) =="Echidna"       ] <- "blue"
cols[rownames(data) =="Lesser_short-tailed_shrew"] <- "cyan"
cols[rownames(data) =="Ground_squirrel"  ] <- "magenta"

#
# Can now call guided_pcp

guided_pcp(res,path=match(o,nodes(g)),
           scale=FALSE, pathw=ew, bar.col="#FDCDAC",
           pcpfn=pcp0, lwd=2, pcp.col=cols,
           main="Sleep data: Model residuals.",
           pc.mar=c(1,1,1.5,1))

######
#
#  There are several other plots in PairViz
#  See demos. e.g. interaction plots
#             categorical parallel coords
#             table plots
#
#  Also there are several graph constructors.
#
#  Bipartite graphs
g <- bipartite_graph(LETTERS[1:4],1:5)
dev.new()
plot(g)

#
# Graph complement
#
plot(complement(g))

#
# Line Graphs
# 
plot(mk_line_graph(g),"circo")
plot(complement(mk_line_graph(g)), "circo")

# Other functions include
# mk_complete_graph(...)
# kspace_graph(...)
plot(kspace_graph(5,3,2),"circo")

# k nearest neighbour graphs of graphs
# knn_graph(...)
#
# iterated_line_graph(...)
# graph_product(...)
# graph_compose(...)
# dn_graph(...)
# Any graphNEL graph from the graph package.
#
########