######################################### # 2014-01-29 Eploratory Data Analysis # # Example: Albany Airport daily weather # reports from December 2013 # KALB_201312.csv # (retrieved from # http://www.erh.noaa.gov/aly/Climate/Albany/ClimateALB.htm ######################################### dec<-read.csv("data/KALB_201312.csv",skip=2) print(names(dec)) # we plot the temperature daily maximum # on the x-axis the day in the month # on the y-axis the daily maximum temperature plot(dec$DY, dec$MAX,main="Albany temperature") # when plot() has been called we can add # more into the graph, like a connecting line lines(dec$DY,dec$MAX,col="red") res<-readline("hit enter to continue") # we add the minimum temperatures lines(dec$DY,dec$MIN,col="blue") # We can do better but we need to adjust the # y-axis range # first an empty graph with argument typ='n' plot(c(1,31),c(-20,70),typ='n', main="Albany temperature", xlab='day',ylab='temperature [F]') lines(dec$DY,dec$MAX,col="red") lines(dec$DY,dec$MIN,col="blue") res<-readline("hit enter to continue") print("What was the average December 2013 temperature?") # first it is common to obtain daily mean # temperatures from tmax and tmin as tday<-(dec$MAX+dec$MIN)/2 lines(dec$DY,tday,col="black") # the average Dec daily mean temperature is tmean<-mean(tday) print(paste("The average December 2013 Temperature is ",tmean)) # NOTE: the computer returns long decimals in the result # We will learn soon that it is reasonable to # round the result to one or two digits. tmean<-round(tmean,1) print(paste("The average December 2013 Temperature is ",tmean)) lines(c(1,31),c(tmean,tmean),col="black",lwd=2) dev.copy2pdf(file="figures/KALB_temp_201312.pdf") res<-readline("hit enter to continue") ################################################### # now back to our probability and events # How many freezing days were in December? # We use daily max temperatures ################################################### # R allows for a number of operations to compare # data values (x==1 x<1 X>1) # the result is TRUE or FALSE which is internally # also for R a number of 1 or 0 # I use this exercise to introduce the two most # important concepts in writing computer programs # loops and if-else statements # a new set of brackets is used here to enclose # a block of commands that are to be executed in # loops, if-else-statements # NOTE: # text lines wihtin the loop are (and should always be) # indented. # So is then the if block, which makes the code # easier to read and understand. count<-0 for (n in 1:31) { if (dec$MAX[n]<=32.0) { count<-count+1 print (paste(count,": Dec. ",n," was below 32F")) } else { # empty block, nothing to do in this case } } print(paste("Frequentist's approach to probability estimation of freezing events")) print(paste("sample size n=",n, "freezing events: ",count)) P1<-round(count/n,2) P2<-1-P1 print(paste("estimated probability for freezing events :",P1)) print(paste("The probability of the complement event:",P2)) ################################################### # INFO: # the function read.csv() # is good for reading data tables where # columns are delimited with a colons # EXCEL spreadsheets can be exported to csv # format. # the argument in the function call "skip=2" # tells the function to ignore the # first two lines while reading the data table # print the names of the data columns # The third line in KALB_201312.csv contains # useful names for the data columns # which we can use in R # (please take a look at the file KALB_201312.csv # how it looks)