# chapter 7 Wilks book # Simple regression example page 235. # Regression line for the Manua Loa monthly Co2 using mutliple linear # regression against time. import numpy as np from numpy.linalg import inv import matplotlib.pyplot as plt import sys as ss A=np.loadtxt('../data/co2.dat') Y=A[:,4]; X=np.arange(1,np.size(Y)+1) X1=np.vstack([np.ones(np.size(X)), X]).T c=inv(X1.T.dot(X1)).dot(X1.T).dot(Y[:,np.newaxis]) co2_hat=c[0]+c[1]*X plt.figure(figsize=(15,7));plt.scatter(X[1:400],Y[1:400],4);plt.plot(X[1:400],co2_hat[1:400],'-r') plt.xlabel('Time from 1950-March to 2015') plt.ylabel('Co2') plt.title('Linear regression using one predictor') plt.show() # Goodness of fitness. # Goodness of fitness. # For perfect regression # MSE = 0, R2=1 , SST=SST # Mean Square Error. MSE=sum((Y-co2_hat)**2)/(len(X)-2) residual=np.sqrt(MSE) # vairation around the regression line SSR=sum((co2_hat-np.mean(Y))**2) # variation around the predictand SST=sum((Y-np.mean(Y))**2) # Coefficient of determintation. R2=SSR/SST print('MSE',MSE,'residual is ',residual,'SSR',SSR,'SST',SST,'R2',R2) # # the figure is similar to 7.6d, so we have to increase the number of # predictors. X11=np.vstack([np.ones(np.size(X)) , X , X**2]).T c=inv(X11.T.dot(X11)).dot(X11.T).dot(Y[:,np.newaxis]) co2_hat=c[0]+c[1]*X+ c[2]*X**2; plt.figure(figsize=(15,7));plt.scatter(X[1:400],Y[1:400],4);plt.plot(X[1:400],co2_hat[1:400],'-r') plt.xlabel('Time from 1950-March to 2015') plt.ylabel('Co2') plt.title('Linear regression using three predictors') plt.show() # Mean Square Error. MSE=sum((Y-co2_hat)**2)/(len(X)-2) residual=np.sqrt(MSE) SSR=sum((co2_hat-np.mean(Y))*2) SST=sum((Y-np.mean(Y))*2) R2=SSR/float(SST) print('MSE',MSE,'residual is ',residual,'SSR',SSR,'SST',SST,'R2',R2) ## last thing that we need to so is to be able to similate the siunsoidal variation. X111=np.vstack([np.ones(np.size(X)) , X , X**2 ,np.cos(2*np.pi*X/12) , np.sin(2*np.pi*X/12) ]).T; b=inv(X111.T.dot(X111)).dot(X111.T).dot(Y[:,np.newaxis]) co2_hat=b[0]+b[1]*X+ b[2]*X**2+b[3]*np.cos(2*np.pi*X/12)+b[4]*np.sin(2*np.pi*X/12); plt.figure(figsize=(15,7));plt.scatter(X[1:400],Y[1:400],4);plt.plot(X[1:400],co2_hat[1:400],'-r') plt.xlabel('Time from 1950-March to 2015') plt.ylabel('Co2') plt.title('Linear regression using 4 predictors') plt.show() # Mean Square Error. MSE=sum((Y-co2_hat)**2)/(len(X)-2) residual=np.sqrt(MSE) SSR=sum((co2_hat-np.mean(Y))**2) SST=sum((Y-np.mean(Y))**2) R2=SSR/SST print('MSE',MSE,'residual is ',residual,'SSR',SSR,'SST',SST,'R2',R2)