# chapter 7 Wilks book # this example show the result of the overfitting import numpy as np from numpy.linalg import inv import matplotlib.pyplot as plt import statsmodels.api as sm import sys as ss A = np.loadtxt('../data/overfillting_winter.txt'); # X == year, deficit, personal, sheep, STA # Y == snow X_year=A[:,0]; X_deficit=A[:,2]; X_personal=A[:,3]; X_sheep=A[:,4]; X_SAT=A[:,5]; Y_snow=A[:,1] plt.figure;plt.scatter(X_year,Y_snow); plt.xlabel('Years ') plt.ylabel('nu. of tornados') plt.show() ## linear regression X1=np.vstack([np.ones(np.size(X_year[0:6])), \ X_year[0:6] , \ X_deficit[0:6] , \ X_personal[0:6] , \ X_sheep[0:6] , \ X_SAT[0:6]]).T; b=inv(X1.T.dot(X1)).dot(X1.T).dot(Y_snow[0:6,np.newaxis]) Y_snow_hat=b[0]+b[1]*X_year+b[2]*X_deficit+b[3]*X_personal+b[4]*X_sheep+b[5]*X_SAT; plt.figure;plt.scatter(X_year,Y_snow);plt.plot(X_year,Y_snow_hat,'-r') plt.xlabel('Years') plt.ylabel('nu. of tornados') plt.show() # Goodness of fitness. # (1) Mean Square Error. MSE=np.sum((Y_snow[0:6]-Y_snow_hat[0:6])**2)/(np.size(X_SAT[0:6])-2) # (2) Coefficient of determintation. SSR=np.sum((Y_snow_hat[0:6]-np.mean(Y_snow_hat[0:6]))**2) SST=np.sum((Y_snow[0:6]-np.mean(Y_snow[0:6]))**2) R2=SSR/SST print('MSE',MSE,'R2',R2)