################################################################################### # Date: 2014-04-22 # Global mean temperature time series # Examples for t-test # Author: Oliver Elison Timm ################################################################################### ### clean up memory rm(list=ls()) par(mfrow=c(2,2)) ### download the data first and store ### set usemonthly TRUE to work with monthly mean times series ### set FALSE to work with annual mean time series usemonthly<-FALSE ############################################################## # MAIN PART ############################################################## useannual<-!usemonthly if (usemonthly) { buffer<-read.table("data/hadcrut4mon.txt") # get column 2 : Monthly temperature anomalies x<-buffer[,2] n<-length(x) time<-(seq(1,n,1)-1)/12.0+1850 title<-"global temperature anomalies (monthly mean)" } if (useannual) { buffer<-read.table("data/hadcrut4ann.txt") # get column 2 : Monthly temperature anomalies x<-buffer[,2] n<-length(x) time<-(seq(1,n,1)-1)+1850 title<-"global temperature anomalies (annual mean)" } plot(time,x,typ='l',ylab='temp. anom. [deg C]') wait<-readline("press enter to compare two time periods's mean") i1<-(time>=1951 & time <1981) i2<-(time>=1981 & time <2011) x1<-x[i1] t1<-time[i1] x2<-x[i2] t2<-time[i2] # here we perform a test for the mean with two samples # both of with unknown sample variance # the test statistics becomes approximately Gaussian res<-t.test(x1,x2) tvalue<-res$statistic print(res) # some more illustration # # add average line m1<-mean(x1) m2<-mean(x2) # difference d<-m1-m2 lines(range(t1),c(m1,m1),col='blue',lwd=2) lines(range(t2),c(m2,m2),col='red',lwd=2) # box plots highlight the difference between the two periods compare<-list(period1=x1,period2=x2) boxplot(compare) xbreaks<-seq(-1,1,0.2) hist(x1,breaks=xbreaks,freq=FALSE, border='cyan',col='blue', ylim=c(0,2),xlab="temp ano. [deg C]", main='') par(new=TRUE) hist(x2,breaks=xbreaks,freq=FALSE, border='red', ylim=c(0,2),xlab='',ylab='',main='') # what are the chances of having a difference # period two samples # if we draw randomly from the entire time # we use sample to create a random # sequence of integer numbers between 1 to n nrep<-1000 # isample is of same size as our 30-yr periods ilen1<-length(x1) ilen2<-length(x2) dr<-rep(NA,nrep) dt<-rep(NA,nrep) for ( i in 1:nrep) { isample<-sample(n) i1<-1 i2<-ilen1 i3<-ilen1+1 i4<-ilen1+ilen2 xr1<-x[isample[i1:i2]] xr2<-x[isample[i3:i4]] dr[i]<-mean(xr1)-mean(xr2) # perform a t-test and store the test statistic value res<-t.test(xr1,xr2) dt[i]<-res$statistic } dbreaks<-seq(-0.5,0.5,0.02) hist(dr,breaks=dbreaks, border='black',freq=FALSE, ,xlab='random differences [deg C]' ,ylab='',main='random time-mean differences') # mark our difference as a green point points(d,0,col=3,cex=1,pch=19) text(d,1,"obs.", adj=0,col=3) wait<-readline("press enter to see the histogram for the t-test statistic") tbreaks<-seq(-5,5,0.1) par(mfrow=c(1,1)) hist(dt,breaks=tbreaks, border='black',freq=FALSE, ,xlab='test variable t' ,ylab='',main='random time-mean differences') # mark our difference as a green point points(tvalue,0,col=3,cex=1,pch=19) text(tvalue,1,"obs.", adj=0,col=3) print("if you don't see the t-value for the two time periods") print("it is out of the range shown on the x-axis!") print(paste("t-value: ",round(tvalue,4)))