########################################
# 2014-04-10
#
# Principal component analysis
# in higher dimensional space
# an image example 
########################################
# use myfunctions to load an image
rm(list=ls())
par(mfrow=c(2,2))
source("scripts/myfunctions.R")
imagefile<-"labrador.ppm"
#imagefile<-"CAM00221.ppm"
M<-loadppm(paste("data/",imagefile,sep=''))

# make the number of rows larger 
# than number of columns for 
# this example
if (imagefile=="labrador.ppm") {
  M<-M[,12:115]
}
if (imagefile=="CAM00221.ppm") {
  M<-M[,1:300]
}


p<-seq(1,255,1)/255
p<-(p)^0.5
palette<-gray(p)
res<-dim(M)
nrow<-res[1]
ncol<-res[2]
# original image
matrix_image(M,col=palette)


# Principal Component Analysis
# here we interpret the rows
# as repeated samples
# and columns as variables
# (obsetrvations points for brightness)
# The vector space is thus of dimension
# ncol !
# A covariance can be formed (ncol by ncol)
# and eigenvalues can be found
res<-princomp(M)

print("total variance in the data matrix")
varall<-sum(res$sdev^2)
print(round(varall,3))
# print the variance projecting onto 
# the frist leading eigenvectors

print("Variance projecting onto leading eigenvectors")
for ( i in 1:10) { 
  shelp<-res$sdev[i]^2
  print(paste(i," ",round(shelp,3)))
}

print("Fraction of the total variance projecting onto eigenvectors")
for ( i in 1:10) { 
  shelp<-res$sdev[i]^2/varall
  print(paste(i," ",round(shelp,3)))
}


# we can illustrate how the variance
# is distributed along the eigenvector
# directions in the high-dimensional 
# space
plot(1:ncol,res$sdev^2/varall,main="PCA Eigenvalues",xlab="Rank of Eigenvalue",ylab="fraction of total variance")

# PCA as a data compression tool
# we use the new basis vectors (eigenvectors)
# to construct the column vectors
# for this we need to use the 
# eigenvectors returned from princomp
# (res$loadings)
# As a second information we need to now
# the proper scaling factors for the 
# eigenvectors to construct each row
# in the matrix!
# res$scores holds this information
# in a nrow by ncol matrix already!

# So here is how we can reconstruct our
# image without any loss of information


Mrec<-res$scores%*%t(res$loadings)

matrix_image(Mrec,col=palette,main="PCA reconstructed")


# However, most of the features 
# of a dog are similar from column
# to column and if we use 
# to first few eigenvectors
# ('those that point into the 
# direction of largest covariability')
# we can reconstruct our dog image
# with less than the 104 eigenvectors

wait<-readline("next we reconstruct the image with the aid of eigenvectors")
irec<-20
Mrec2<-matrix(0,nrow,ncol)
par(mfrow=c(1,1))
shelp<-0
for ( i in 1:irec) {
  Mrec2<-Mrec2+res$scores[,i:i]%*%t(res$loadings[,i:i])
  shelp<-shelp+res$sdev[i]^2
  frac<-round(shelp/varall,3)
  title<-paste("no of eigenvalues :",i," ;  fraction of var. ",frac,sep='')
  buffer<-matrix_image(Mrec2,col=palette,main=title)
  wait<-readline(paste("press enter or 'q' to quit"))
  if (wait=='q'){ break}
}