# scatter covariance and correlation # example using random numbers # # function seq(from,to,by) # from value to value in increments of the third value # the finer the interval the more data points # 2014-03-25 UPDATE: I changed the # slope and intercept variable names # to match the class14.ppt slides # [please review your class note for the # definition of the meaning of a and b] # a= intercept, b = slope of the linear line # y=b*x+a x<-seq(-1,10,0.05) n<-length(x) print (paste("Note: the sample size will be: ",n)) a<-(-2) b<-(1) y<-b*x+a xrange<-range(x) yrange<-range(y) #yrange<-c(-4,4) plot(xrange,yrange,typ='n') lines(x,y,typ='l') lines(c(0,0),range(y)) text(max(x),0,">") text(max(x),-0.75,"x") lines(range(x),c(0,0)) text(0,max(y),"^") text(-0.35,max(y),"y") # now we assume that the values of y are contaminated with random # errors (noise), but only the y values (the dependent variable), # not the x values. This comes from the history of using # linear regression often with controlled experimental data, # where one is in control of one variable x, and interested in # the outcome y, but yet the experiment cannot prevent other # influencing factors to introduce errors in the outcome # (whether it is measurement errors, or environmental factors # that vary during the experiment) # standard deviation of the gaussian-distributed random data se<-1 print(paste("standard deviation or the gaussian random sample data ",se)) e<-rnorm(n=length(x),m=0,sd=se) yobs<-b*x+a+e points(x,yobs,col=4,pch='+') print(paste("correlation yobs and x",round(cor(yobs,x),2))) # estimate the regression line: # slope bfit and intercept afit r<-cor(x,yobs) syobs<-sd(yobs) sx<-sd(x) bfit<-r*syobs/sx afit<-mean(yobs)-bfit*mean(x) bfit<-round(bfit,2) afit<-round(afit,2) print (paste("estimated slope parameter b:", bfit)) print (paste("estimated intercept parameter a:", afit)) print (paste("regression line model: yfit= ",afit,"+",bfit,"*x")) # create the regression line yfit<-afit+bfit*x lines(x,yfit,col=3,lty=2,lwd=2) # mark the center of the sample cloud points(mean(x),mean(yobs),pch='X',cex=1.5,col=3) # Excercise: # Change the correlation ratio by increasing the variance of # the errors # # Change the sample size and see how it affects the estimated # regression parameter # and line.