################################################### # 2014-02-03 Exploratory Data Analysis # # Example: Albany Airport data from # Global Historical Climatology Network -Daily # # this station has the international ID: USW00014735 # Daily data were retrieved from # http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/ # For this course I preprocessed the data # into monthly mean data # --------------------------------------- # Data table description: # column 1: year # column 2: month # column 3: days in month # column 4: index counter # column 5: monthly mean of the variable # column 6: number of observations # column 7: number of missing observations # column 8: ratio number of observations # / days in month # If too few observations were made # I set the monthly mean to 'NA' # NA is a special value in R indicating # no value. ################################################### ################################################### # tmax, tmin tavg are the 2m air temperature data # (daily max, min and mean) # Units: Degree Celsius # prcp: is daily precipitation amount in mm # snow is in mm ################################################### tmax<-read.table("data/USW00014735_tmax_mon_mean.asc") tmin<-read.table("data/USW00014735_tmin_mon_mean.asc") tavg<-read.table("data/USW00014735_tavg_mon_mean.asc") prcp<-read.table("data/USW00014735_prcp_mon_mean.asc") snow<-read.table("data/USW00014735_snow_mon_mean.asc") ################################################### # here I call another script with source code # This way I can make use of code without re-typing # it in every program. # In this case I want functions for Fahrenheit # to Celsius conversion ################################################### source("scripts/myfunctions.R") ################################################### # Plotting the time series from 1900 to 2013 ################################################### # Creating a time coordinate inum<-length(tavg$V1) yr1<-tavg$V1[1] dt<-1.0/12 ihelp<-seq(1,inum) time<-yr1+(ihelp-1)*dt # print ("We calculate the mean seasonal cycle climatology") print ("Using the 1981-2010 climate normal period (NOAA definition)") isclim<-(tavg$V1>=1981 & tavg$V1<=2010) # isclim is a vector with TRUE and FALSE values # take a look at isclim and thelp # see how we can select parts of the vector elements # based on TRUE FALSE values in a vector of the # same length # thelp gets the time data values for 1981-2010 thelp<-time[isclim] # mhelp gets the month data mhelp<-tavg$V2[isclim] # NOW WE DEFINE A 2-dimensional array to # store all variables we want for # the climatology calculations # buffer gets the temperature data inum2<-length(thelp) buffer<-array(0,dim=c(inum2,6)) buffer[,1]<-c2f(tavg$V5[isclim]) buffer[,2]<-c2f(tmin$V5[isclim]) buffer[,3]<-c2f(tmax$V5[isclim]) buffer[,4]<-prcp$V5[isclim] buffer[,5]<-tavg$V2[isclim] #buffer[,5]<-snow$V5[isclim]) # we define new vectors # that will store the monthly climatology clim<-array(0,dim=c(12,5)) nsum<-array(0,dim=c(12,5)) for (k in 1:5){ for (i in 1:inum2) { m<-mhelp[i] if (is.na(buffer[i]) ) { # skip } else { # real value nsum[m,k]<-nsum[m,k]+1 clim[m,k]<-clim[m,k]+buffer[i,k] } } } # almost there: now from the sum to average # (I demand 25 years with data to calculate # a climatology ) for (k in 1:5){ for (m in 1:12) { if (nsum[m,k]>25) { clim[m,k]<-clim[m,k]/nsum[m,k] } else { clim[m,k]<-NA } } } plot(c(1,12),c(0,80),typ='n', xlab='month',ylab='tavg [F]', main='USW00014735 1981-2010 climatology') lines(1:12,clim[,1],typ='l',col=1) lines(1:12,clim[,2],typ='l',col=4) lines(1:12,clim[,3],typ='l',col=2) lines(c(1,12),c(32.0,32.0),typ='l',lty=2,col=1)