# 2014-02-24 # looking at histograms once more # using Albany and NY Central Park # monthly mean temperature anomalies # (units degree C) # use function loaddata() from loaddata.R source("scripts/loaddata.R") buffer<-loaddata() # buffer[,3] extracts the column #4 (a vector) (Albany) # buffer[,4] extracts the column #5 # and calculates the histogram # I define my own breakpoints # for the histogram xb<-c(-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9) x1<-buffer[,3] x2<-buffer[,4] h1<-hist(x1,breaks=xb,plot=FALSE) h2<-hist(x2,breaks=xb,plot=FALSE) print("returned histogram object has names:") print(names(h1)) res<-readline("press enter to plot the histgrams: ") plot(c(-8,8),c(0,250),typ='n',xlab='anomaly [C]',ylab='count') lines(h1$mid,h1$count,typ='l',col=1) lines(h2$mid,h2$count,typ='l',col=4) res<-readline("press enter to plot the same histograms: but using 'density'") plot(c(-8,8),c(0,0.5),typ='n',xlab='anomaly [C]',ylab='density') lines(h1$mid,h1$density,typ='l',col=1) lines(h2$mid,h2$density,typ='l',col=4) text(-8,0.5,"Albany",col=1,adj=0) text(-8,0.47,"Central Park",col=4,adj=0) xm1<-mean(x1) xm2<-mean(x2) sd1<-sd(x1) sd2<-sd(x2) print(paste("mean and standard deviation of x1:",round(xm1,2),round(sd1,2))) print(paste("mean and standard deviation of x2:",round(xm2,2),round(sd2,2))) res<-readline("standardized variables: histogram:") print("x-axis range changes, the width of the histogram changes") # the break points of the histogram # I divide by 2 (smaller range) zb<-xb/2 z1<-(x1-xm1)/sd1 z2<-(x2-xm2)/sd2 h1<-hist(z1,breaks=zb,plot=FALSE) h2<-hist(z2,breaks=zb,plot=FALSE) plot(c(-8,8),c(0,0.5),typ='n',main='standardized data',xlab='anomaly [C]',ylab='density') lines(h1$mid,h1$density,typ='l',col=1) lines(h2$mid,h2$density,typ='l',col=4) res<-readline("standardized variables: Comparison with the Normal-Distribtuion") # function rnorm() generates a random sample # of data drawn from a Gaussian Bell-shaped Normal # distribution n<-length(x1) z0<-rnorm(n) h0<-hist(z0,breaks=zb,plot=FALSE) lines(h0$mid,h0$density,typ='h',col=3,lwd=1) xm0<-mean(z0) sd0<-sd(z0) print("mean and standard deviation of random sample drawn") print (paste("from Normal distribution :",round(xm0,4),round(sd0,4))) res<-readline("normal distribution theoretical probability density function:") source("scripts/myfunctions.R") p0<-fgauss(h0$mid) lines(h0$mid,p0,typ='l',col=3,lwd=3)