###################################################################################
# Date: 2014-04-22
# Global mean temperature time series 
# Examples for t-test
# Author: Oliver Elison Timm
###################################################################################

### clean up memory
rm(list=ls())
par(mfrow=c(2,2))
### download the data first and store
### set usemonthly TRUE to work with monthly mean times series
### set FALSE to work with annual mean time series
usemonthly<-FALSE



##############################################################
# MAIN PART
##############################################################



useannual<-!usemonthly

if (usemonthly) {
  buffer<-read.table("data/hadcrut4mon.txt")
  # get column 2 : Monthly temperature anomalies
  x<-buffer[,2]
  n<-length(x)
  time<-(seq(1,n,1)-1)/12.0+1850
  title<-"global temperature anomalies (monthly mean)"  
} 
if (useannual) {
  buffer<-read.table("data/hadcrut4ann.txt")
  # get column 2 : Monthly temperature anomalies
  x<-buffer[,2]
  n<-length(x)
  time<-(seq(1,n,1)-1)+1850
  title<-"global temperature anomalies (annual mean)"
}


plot(time,x,typ='l',ylab='temp. anom. [deg C]')


wait<-readline("press enter to compare two time periods's mean")

i1<-(time>=1951 & time <1981)
i2<-(time>=1981 & time <2011)

x1<-x[i1]
t1<-time[i1]

x2<-x[i2]
t2<-time[i2]

# here we perform a test for the mean with two samples 
# both of with unknown sample variance
# the test statistics becomes approximately Gaussian
res<-t.test(x1,x2)
tvalue<-res$statistic
print(res)



# some more illustration
#
# add average line 
m1<-mean(x1)
m2<-mean(x2)

# difference
d<-m1-m2
lines(range(t1),c(m1,m1),col='blue',lwd=2)
lines(range(t2),c(m2,m2),col='red',lwd=2)



# box plots highlight the difference between the two periods

compare<-list(period1=x1,period2=x2)
boxplot(compare)


xbreaks<-seq(-1,1,0.2)
hist(x1,breaks=xbreaks,freq=FALSE,
     border='cyan',col='blue',
     ylim=c(0,2),xlab="temp ano. [deg C]",
     main='')
par(new=TRUE)
hist(x2,breaks=xbreaks,freq=FALSE,
     border='red',
     ylim=c(0,2),xlab='',ylab='',main='')


# what are the chances of having a difference 
# period two samples
# if we draw randomly from the entire time 
# we use sample to create a random
# sequence of integer numbers between 1 to n

nrep<-1000

# isample is of same size as our 30-yr periods
ilen1<-length(x1)
ilen2<-length(x2)
dr<-rep(NA,nrep)
dt<-rep(NA,nrep)
for ( i in 1:nrep) {
  isample<-sample(n)
  i1<-1
  i2<-ilen1
  i3<-ilen1+1
  i4<-ilen1+ilen2
  xr1<-x[isample[i1:i2]]
  xr2<-x[isample[i3:i4]]
  dr[i]<-mean(xr1)-mean(xr2)
  # perform a t-test and store the test statistic value
  res<-t.test(xr1,xr2)
  dt[i]<-res$statistic
}
dbreaks<-seq(-0.5,0.5,0.02)
hist(dr,breaks=dbreaks,
     border='black',freq=FALSE,
     ,xlab='random differences [deg C]'
     ,ylab='',main='random time-mean differences')
# mark our difference as a green point
points(d,0,col=3,cex=1,pch=19)
text(d,1,"obs.", adj=0,col=3)


wait<-readline("press enter to see the histogram for the t-test statistic")

tbreaks<-seq(-5,5,0.1)
par(mfrow=c(1,1))
hist(dt,breaks=tbreaks,
     border='black',freq=FALSE,
     ,xlab='test variable t'
     ,ylab='',main='random time-mean differences')
# mark our difference as a green point
points(tvalue,0,col=3,cex=1,pch=19)
text(tvalue,1,"obs.", adj=0,col=3)
print("if you don't see the t-value for the two time periods")
print("it is out of the range shown on the x-axis!")
print(paste("t-value: ",round(tvalue,4)))