################################################### # 2014-02-03 Exploratory Data Analysis # # Example: Albany Airport data from # Global Historical Climatology Network -Daily # # this station has the international ID: USW00014735 # Daily data were retrieved from # http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/ # For this course I preprocessed the data # into monthly mean data # --------------------------------------- # Data table description: # column 1: year # column 2: month # column 3: days in month # column 4: index counter # column 5: monthly mean of the variable # column 6: number of observations # column 7: number of missing observations # column 8: ratio number of observations # / days in month # If too few observations were made # I set the monthly mean to 'NA' # NA is a special value in R indicating # no value. ################################################### source("scripts/myfunctions.R") ################################################### # snow is in mm (water equivalent) ################################################### snow<-read.table("data/USW00014735_snow_mon_mean.asc") ################################################### # Plotting the time series from 1900 to 2013 ################################################### # Creating a time coordinate inum<-length(snow$V1) yr1<-snow$V1[1] dt<-1.0/12 ihelp<-seq(1,inum) time<-yr1+(ihelp-1)*dt # open a new x-y plot panel # temperature in Fahrenheit plot(c(1900,2013),c(0,110),typ='n', xlab='time',ylab='snow [mm]', main='Albany USW00014735') res<-snow$V5 lines(time,res,col=1) print ("We see a lot of hi-frequency variations ...") enter<-readline("press enter to continue") plot(c(1981,2010),c(0,110),typ='n', xlab='time',ylab='snow [mm]', main='Albany USW00014735') lines(time,res,col=1) print("The seasonal cycle is predominant signal") enter<-readline("press enter to continue") print ("We calculate now the mean seasonal cycle climatology") print ("Using the 1981-2010 climate normal period (NOAA definition)") isclim<-(snow$V1>=1981 & snow$V1<=2010) # isclim is a vector with TRUE and FALSE values # take a look at isclim and thelp # see how we can select parts of the vector elements # based on TRUE FALSE values in a vector of the # same length # thelp gets the time data values for 1981-2010 thelp<-time[isclim] # mhelp gets the month data mhelp<-snow$V2[isclim] buffer<-snow$V5[isclim] inum2<-length(thelp) # we define new vectors # that will store the monthly climatology snowclim<-array(0,dim=c(12)) nsum<-array(0,dim=c(12)) for (i in 1:inum2) { m<-mhelp[i] # now we must check at position i # buffer has a real number or NA # is.na() is a builtin funtion # that checks for NA and returns TRUE or FALSE if (is.na(buffer[i]) ) { # skip } else { # real value nsum[m]<-nsum[m]+1 snowclim[m]<-snowclim[m]+buffer[i] } } # almost there: now from the sum to average # (I demand 25 years with data to calculate # a climatology ) for (m in 1:12) { if (nsum[m]>25) { snowclim[m]<-snowclim[m]/nsum[m] } else { snowclim[m]<-NA } } plot(c(1,12),c(0,30),typ='n', xlab='month',ylab='snow [mm]', main='USW00014735 1981-2010 climatology') lines(1:12,snowclim,typ='h',lwd=5)