######################################## # 2014-04-10 # # Principal component analysis # in higher dimensional space # an image example ######################################## # use myfunctions to load an image rm(list=ls()) par(mfrow=c(2,2)) source("scripts/myfunctions.R") imagefile<-"labrador.ppm" #imagefile<-"CAM00221.ppm" M<-loadppm(paste("data/",imagefile,sep='')) # make the number of rows larger # than number of columns for # this example if (imagefile=="labrador.ppm") { M<-M[,12:115] } if (imagefile=="CAM00221.ppm") { M<-M[,1:300] } p<-seq(1,255,1)/255 p<-(p)^0.5 palette<-gray(p) res<-dim(M) nrow<-res[1] ncol<-res[2] # original image matrix_image(M,col=palette) # Principal Component Analysis # here we interpret the rows # as repeated samples # and columns as variables # (obsetrvations points for brightness) # The vector space is thus of dimension # ncol ! # A covariance can be formed (ncol by ncol) # and eigenvalues can be found res<-princomp(M) print("total variance in the data matrix") varall<-sum(res$sdev^2) print(round(varall,3)) # print the variance projecting onto # the frist leading eigenvectors print("Variance projecting onto leading eigenvectors") for ( i in 1:10) { shelp<-res$sdev[i]^2 print(paste(i," ",round(shelp,3))) } print("Fraction of the total variance projecting onto eigenvectors") for ( i in 1:10) { shelp<-res$sdev[i]^2/varall print(paste(i," ",round(shelp,3))) } # we can illustrate how the variance # is distributed along the eigenvector # directions in the high-dimensional # space plot(1:ncol,res$sdev^2/varall,main="PCA Eigenvalues",xlab="Rank of Eigenvalue",ylab="fraction of total variance") # PCA as a data compression tool # we use the new basis vectors (eigenvectors) # to construct the column vectors # for this we need to use the # eigenvectors returned from princomp # (res$loadings) # As a second information we need to now # the proper scaling factors for the # eigenvectors to construct each row # in the matrix! # res$scores holds this information # in a nrow by ncol matrix already! # So here is how we can reconstruct our # image without any loss of information Mrec<-res$scores%*%t(res$loadings) matrix_image(Mrec,col=palette,main="PCA reconstructed") # However, most of the features # of a dog are similar from column # to column and if we use # to first few eigenvectors # ('those that point into the # direction of largest covariability') # we can reconstruct our dog image # with less than the 104 eigenvectors wait<-readline("next we reconstruct the image with the aid of eigenvectors") irec<-20 Mrec2<-matrix(0,nrow,ncol) par(mfrow=c(1,1)) shelp<-0 for ( i in 1:irec) { Mrec2<-Mrec2+res$scores[,i:i]%*%t(res$loadings[,i:i]) shelp<-shelp+res$sdev[i]^2 frac<-round(shelp/varall,3) title<-paste("no of eigenvalues :",i," ; fraction of var. ",frac,sep='') buffer<-matrix_image(Mrec2,col=palette,main=title) wait<-readline(paste("press enter or 'q' to quit")) if (wait=='q'){ break} }