# 2014-04-22: # Script to illustrate the statistical # hypothesis testing # # Motivated by the # example in the PBS NOVA documentary # where the researchers study how birds # use the environment to navigate and # avoid collisions with objects # In that experiment: # a starling flies through a corridor # with side walls painted in a # horizontally striped pattern # ten times. # Then the experiment is repeated # with the walls being transformed # into vertical stripe pattern # In both cases the bird flies # the same distance repeatedly # (10 times in each experiment) # the difference in the average # speed of the bird flying through # the corridor is considered statistically # significant # # We only know the average from speed # from both experiments. # We assume: all experiments are in each group are # identical and independent. # sample size for the two experiments n<-c(10,10) # mean of the experiments # ft/s m<-c(15.3, 16.5) #m<-c(16.0,16.5) # we don't know the standard deviation yet, so we # experiment with a number of different values # and see how the statistical test would turn out # sdev<-c(1,1) # Let's assume gaussian random fluctuations around the means exp1<-rnorm(n,m=m[1],sd=sdev[1]) exp2<-rnorm(n,m=m[2],sd=sdev[2]) # we create a test statistic that has a # known theoretical probability density function # under the null hypothesis H0 z<-(m[1]-m[2])/sqrt(sdev[1]^2/n[1]+sdev[2]^2/n[2]) print("Mean values: ") print(round(m,4)) print ("Sample size: ") print(n) print("Standard deviations (simulated scenario): ") print(round(sdev,4)) print(paste("Differences in the means:",round(m[1]-m[2],4))) print("Test statistic for the differences in the mean") print("(Follows a Standardized Gaussian Distribution)") print(round(z,4)) wait<-readline("press enter to go see the CDF") # let's see where this test statistical value is loacted: # the tails of the distribtuion, it will indicate # that it is unlikely to be conform with the null hypothesis # we use the built-in function dnorm(x) p<-pnorm(z) print(paste("Probability of obtaining a value z<= ",round(z,4))) print(round(p,4)) # create a gaussian probability density function plot zval<-seq(-4,4,0.1) cdf<-pnorm(zval) par(mfrow=c(1,1)) plot(zval,cdf,xlab='test score z',ylab='Cumulative Density Function',typ='l') lines(c(z,z),c(0,1),typ='l',col='red') text(z+.2,1,"our test value z",adj=0,col='red') wait<-readline("press enter to go see the PDF") # create a gaussian probability density function plot zval<-seq(-4,4,0.1) pdf<-dnorm(zval) par(mfrow=c(1,1)) plot(zval,pdf,xlab='test score z',ylab='Probability Density Function',typ='l') lines(c(z,z),c(0,1),typ='l',col='red') text(z+.2,1,"our test value z",adj=0,col='red') text(z+0.2,0.01,"z",adj=0,col='red')