# 2014-4-16 # Multiple Linear Regression # Introduction rm(list=ls()) # reset the plot window to full sized plot par(mfrow=c(1,1)) source("scripts/loadano2.R") source("scripts/myfunctions.R") # note this logical variable will allow us # to calculate the correlation matrix iscor<-FALSE ############################################### # Select here different months Jan - Dec ############################################### month<-"Jun" varname<-"tavg" start<-1950 end<-2012 # sample size, used to define matrix n<-end-start+1 stationlist<-c("USW00014735","USW00014750","USW00014771","USW00094728") # variables (stations), used to define matrix m<-length(stationlist) # define a n x m matrix filled with NA entries X<-matrix(NA,n,m) ##################################################### # here I form a loop to repeat the loading of the # data for the stations given in stationlist ##################################################### for ( i in 1:m) { buffer<-loadano2(station=stationlist[i],state="NY",month=month,start=start,end=end) mhelp<-mean(buffer$ano) # now I assign the centered data vector # into the column i of the matrix if (iscor) { # standardize to get the correlation coefficients # NOTE: standard deviation function is using 1/(n-1) # factor, but below we work with 1/n # That's why sqrt(n-1/n) shelp<-sd(buffer$ano)*sqrt((n-1)/n) X[,i]<-(buffer$ano-mhelp)/shelp } else { # default is the covariance will be calculated # from centered data X[,i]<-(buffer$ano-mhelp) } } # the function t() is forming the transpose of a matrix print("size of the data matrix X: ") print(dim(X)) print("size of the transpose of data matrix X: ") print(dim(t(X))) # the matrix multiplication # %*% is the operator for matrix multiplication C<-t(X)%*%X # multiply the matrix with factor 1/n # to get the covariance (correlation matrix) C<-1/n*C if (iscor) { print("size of the correlation matrix: ") print(dim(C)) print ("Correlation Matrix") print(C) } else { print("size of the Covariance matrix: ") print(dim(C)) print ("Covariance Matrix") print(C) } # The variance for the four variables (stations) # are stored on the diagonal # (top left down to bottom right) vsum<-0 for ( i in 1:m){ print(paste("Variance for ",stationlist[i]," : ",round(C[i,i],3),sep='')) vsum<-vsum+C[i,i] } print("---------------------------------------") print(paste("sum of variances : ",round(vsum,3)),sep='') ################################################### # create an empty plot with myplot() # function from myfunctions.R (new 2014-04-10) ################################################### xrange<-c(-5,5) yrange<-xrange time<-buffer$time plot(time,X[,1],pch=1,col=3,lwd=4,xlab="year",ylab="tavg ano [deg C]",typ='b') wait<-readline("press enter to start multiple linear regression with lm()") # Let's sse how good we can fit Albany temperature anomalies # with the temp. anomalies from the other three stations y<-X[,1] # first I use only one station res<-lm( y ~ X[,2:2]) b1fit<-res$coeff[2] print(summary(res)) lines(time,res$fitted,col="blue") wait<-readline("press enter to start multiple linear regression with lm()") # Let's sse how good we can fit Albany temperature anomalies # with the temp. anomalies from the other three stations y<-X[,1] # now I add a fit with another single station res<-lm( y ~ X[,3:3]) b2fit<-res$coeff[2] print(summary(res)) lines(time,res$fitted,col="cyan") wait<-readline("press enter to start multiple linear regression with lm()") # # now the MLR fit with another single station res<-lm( y ~ X[,2:3]) print(summary(res)) lines(time,res$fitted,col="black") b1mlrfit<-res$coeff[2] b2mlrfit<-res$coeff[3] print ("compare fitted regression parameters:") print (b1fit) print (b2fit) print (b1mlrfit) print (b2mlrfit) print("class20a finished")