# chapter 7 Wilks book # Simple regression example page 225 and 230. # regressing candadiag temperature on Ithaca min temperature. import numpy as np from numpy.linalg import inv import matplotlib.pyplot as plt import sys as ss A=np.loadtxt('../data/Ithaca.dat') B=np.loadtxt('../data/Canandaig.dat') # X == min Ithaca temperature # Y == min Candadiag temperature X=A[:,3] Y=B[:,3] X1=np.vstack([np.ones(np.size(X)) , X]).T; C=inv(X1.T.dot(X1)).dot(X1.T).dot(Y[:,np.newaxis]) T_hat_canda=C[0]+C[1]*X; plt.scatter(X,Y);plt.plot(X,T_hat_canda,'-r') plt.xlabel('Ithaca temperature') plt.ylabel('Canadiag temperature') plt.title('Regression example') plt.show() # Goodness of fitness. # For perfect regression # MSE = 0, R2=1 , SST=SST # Mean Square Error. MSE=sum((Y-T_hat_canda)**2)/(len(X)-2) residual=np.sqrt(MSE) # vairation around the regression line SSR=sum((T_hat_canda-np.mean(Y))**2) # variation around the predictand SST=sum((Y-np.mean(Y))**2) # (2) Coefficient of determintation. R2=SSR/SST # examble 7.2 plt.figure() plt.scatter(np.arange(1,len(Y)+1),Y-T_hat_canda);plt.plot(np.arange(1,len(Y)+1),np.zeros(len(Y))) plt.xlabel('Date, January 1987') plt.ylabel('Residual, F') plt.show()