# chapter 7 Wilks book # this example show the forward selection method # in this methode we choose the predictor with the least MSE # and so on import numpy as np from numpy.linalg import inv import matplotlib.pyplot as plt import statsmodels.api as sm import sys as ss A = np.loadtxt('../data/Ithaca.dat'); B = np.loadtxt('../data/Canandaig.dat'); # X == max ithaca temperature # X == min Ithaca temperature # X == max canadiag temperature X_minIthaca=A[:,3]; X_maxIthaca=A[:,2]; X_maxCand=B[:,2]; X_date=A[:,0]; X_preIthaca=np.log10(A[:,1]+0.01); X_preCand=np.log10(B[:,1]+0.01); #Y == min cand temperature Y=B[:,3]; Y=Y[:,np.newaxis] ########################## # the first iterative X=np.vstack([X_date,X_maxIthaca, X_minIthaca , X_preIthaca ,X_maxCand ,X_preCand]).T; for i in np.arange(0,np.size(X,1)): print(i) X1=np.vstack([np.ones(np.size(X_maxCand)) , X[:,i]]).T; b=inv(X1.T.dot(X1)).dot(X1.T).dot(Y); T_hat_canda=b[0]+b[1]*X[:,i]; T_hat_canda=T_hat_canda[:,np.newaxis] plt.figure;plt.scatter(X[:,i],Y);plt.plot(X[:,i],T_hat_canda,'-r') plt.xlabel('Ithaca temperature') plt.ylabel('Canadiag temperature') plt.show() # Goodness of fitness. # (1) Mean Square Error. MSE=np.sum((Y-T_hat_canda)**2)/(np.size(X_maxCand)-2); se=np.sqrt(MSE) # (2) Coefficient of determintation. SSR=np.sum((T_hat_canda-np.mean(Y))**2); SST=np.sum((Y-np.mean(Y))**2); R2=SSR/float(SST); print('SE',se,'R2',R2) ############### # clearvars X X1 b MSE SSR SST R2 T_hat_canda X=np.vstack([X_date,X_maxIthaca , X_preIthaca ,X_maxCand ,X_preCand]).T; for i in np.arange(0,np.size(X,1)): X1=np.vstack([np.ones(np.size(X_maxCand)), X_minIthaca, X[:,i]]).T; b=inv(X1.T.dot(X1)).dot(X1.T).dot(Y); b=np.squeeze(b) T_hat_canda=b[0]+b[1]*X_minIthaca+b[2]*X[:,i]; print(T_hat_canda.shape) plt.figure;plt.scatter(X[:,i],Y);plt.scatter(X[:,i],T_hat_canda,c='r') plt.xlabel('Ithaca temperature') plt.ylabel('Canadiag temperature') plt.show() # Goodness of fitness. # (1) Mean Square Error. MSE=np.sum((Y-T_hat_canda)**2)/(np.size(X_maxCand)-2); # (2) Coefficient of determintation. SSR=np.sum((T_hat_canda-np.mean(Y))**2); SST=np.sum((Y-np.mean(Y))**2); R2=SSR/float(SST); print('MSE= %f and R2= %f ' % (MSE , R2*100))