xxxxxxxxxx
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
from statsmodels.graphics.gofplots import qqplot
# funktionen summaryLM defineres
def summaryLM(lmUD:"ols_output"):
# lave summary af estimeret model i ols-output
pd.options.display.float_format = '{:,.4f}'.format
print('Estimated Coefficients:')
print(lmUD.summary2().tables[1])
print(' ')
print('Number of observations:','{:.0f}'.format(lmUD.nobs),
' Error degrees of freedom:','{:.0f}'.format(lmUD.df_resid))
print('Root Mean Squared Error:',format(np.sqrt(lmUD.mse_resid),'.4g'))
print('R-squared:',format(lmUD.rsquared,'.3g'),' Adjusted R-Squared:',
format(lmUD.rsquared_adj,'.3g'))
print('F-statistic vs. constant model:',format(lmUD.fvalue,'.1f'),
' p-value =',format(lmUD.f_pvalue,'.3g'))
# data indskrives
ess0=np.array([0.04,0.18,0.26,0.28,0.29,0.30,0.32,0.37,0.38,0.40,
0.42,0.43,0.44,0.51,0.56,0.56,0.57,0.58,0.60,0.63,0.64,0.65,0.71,
0.75,0.82,0.88,0.95,1.11])
A0=np.array([0.01,0.05,0.07,0.10,0.09,0.21,0.11,0.21,0.12,0.26,
0.32,0.26,0.34,0.32,0.35,0.16,0.20,0.22,0.28,0.34,0.48,0.47,
0.29,0.44,0.61,0.65,0.76,0.71])
B0=np.array([0.00,0.00,0.20,0.41,0.03,0.00,0.09,0.23,0.41,0.00,0.00,
0.57,0.23,0.41,0.37,0.32,0.47,0.45,0.45,0.77,0.38,0.23,0.67,0.54,
0.37,0.77,0.71,0.61])
# datatabel dannes
# (hvis data indlæses med read_csv har man allerede en datatabel)
dataoploes=pd.DataFrame({'ess':ess0,'A':A0,'B':B0,'AA':A0*A0,'BB':B0*B0,'AB':A0*B0})
print("Opstart gennemført: ess, A, B, AA, BB og AB er indskrevet i tabel dataoploes")
xxxxxxxxxx
# Opstart ovenfor skal være kørt. Data er i datatabellen dataoploes
# multipel regressionsmodel analyseres
lmUD=ols(data=dataoploes,formula='ess~A+B+AA+BB+AB').fit()
summaryLM(lmUD)
xxxxxxxxxx
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
from statsmodels.graphics.gofplots import qqplot
def refline(haeldning:"float",skaering:"float",
linestyle:"linestyle"='-',color:"color"='b',
ax:"plotwindow(default_plt)"=plt):
# indtegne linje i figur
if (ax==plt):
axx=plt.gca()
else:
axx=ax
x_endePkt=axx.get_xlim()
x_midt=(x_endePkt[1]+x_endePkt[0])/2
y_midt=skaering+haeldning*x_midt
axx.axline([x_midt,y_midt],slope=haeldning,linestyle=linestyle,color=color)
# data indskrives
ess0=np.array([0.04,0.18,0.26,0.28,0.29,0.30,0.32,0.37,0.38,0.40,
0.42,0.43,0.44,0.51,0.56,0.56,0.57,0.58,0.60,0.63,0.64,0.65,0.71,
0.75,0.82,0.88,0.95,1.11])
A0=np.array([0.01,0.05,0.07,0.10,0.09,0.21,0.11,0.21,0.12,0.26,
0.32,0.26,0.34,0.32,0.35,0.16,0.20,0.22,0.28,0.34,0.48,0.47,
0.29,0.44,0.61,0.65,0.76,0.71])
B0=np.array([0.00,0.00,0.20,0.41,0.03,0.00,0.09,0.23,0.41,0.00,0.00,
0.57,0.23,0.41,0.37,0.32,0.47,0.45,0.45,0.77,0.38,0.23,0.67,0.54,
0.37,0.77,0.71,0.61])
# datatabel dannes
# (hvis data indlæses med read_csv har man allerede en datatabel)
dataoploes=pd.DataFrame({'ess':ess0,'A':A0,'B':B0,'AA':A0*A0,'BB':B0*B0,'AB':A0*B0})
print("Opstart gennemført: ess, A, B, AA, BB og AB er indskrevet i tabel dataoploes")
xxxxxxxxxx
# Opstart ovenfor skal være kørt. Data er i datatabellen dataoploes
# model analyseres
lmUD=ols(data=dataoploes,formula='ess~A+B').fit()
# residualer dannes og fire figurer dannes
r=lmUD.resid
ax1 = plt.subplot(2, 2, 1)
ax2 = plt.subplot(2, 2, 2)
ax3 = plt.subplot(2, 2, 3)
ax4 = plt.subplot(2, 2, 4)
# residualplots
ax1.plot(dataoploes.A,r,'o')
ax1.set_xlabel('Acidity')
ax1.set_ylabel('Residualer')
ax1.axhline(0)
ax2.plot(dataoploes.B,r,'o')
ax2.set_xlabel('Basicity')
ax2.set_ylabel('Residualer')
ax2.axhline(0)
# qqplot af residualer
qqplot(r,line='q',ax=ax3)
ax3.set_title('Residualer')
# målte mod forventede værdier
ax4.plot(dataoploes.ess-r,dataoploes.ess,'o')
ax4.set_xlabel('Forventede')
ax4.set_ylabel('ESS')
refline(1,0,ax=ax4)
plt.tight_layout()
plt.show()
9.2.3 Konfidensinterval og prædiktionsinterval
xxxxxxxxxx
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
from statsmodels.graphics.gofplots import qqplot
# data indskrives
ess0=np.array([0.04,0.18,0.26,0.28,0.29,0.30,0.32,0.37,0.38,0.40,
0.42,0.43,0.44,0.51,0.56,0.56,0.57,0.58,0.60,0.63,0.64,0.65,0.71,
0.75,0.82,0.88,0.95,1.11])
A0=np.array([0.01,0.05,0.07,0.10,0.09,0.21,0.11,0.21,0.12,0.26,
0.32,0.26,0.34,0.32,0.35,0.16,0.20,0.22,0.28,0.34,0.48,0.47,
0.29,0.44,0.61,0.65,0.76,0.71])
B0=np.array([0.00,0.00,0.20,0.41,0.03,0.00,0.09,0.23,0.41,0.00,0.00,
0.57,0.23,0.41,0.37,0.32,0.47,0.45,0.45,0.77,0.38,0.23,0.67,0.54,
0.37,0.77,0.71,0.61])
# datatabel dannes
# (hvis data indlæses med read_csv har man allerede en datatabel)
dataoploes=pd.DataFrame({'ess':ess0,'A':A0,'B':B0,'AA':A0*A0,'BB':B0*B0,'AB':A0*B0})
print("Opstart gennemført: ess, A, B, AA, BB og AB er indskrevet i tabel dataoploes")
xxxxxxxxxx
# Opstart ovenfor skal være kørt. Data er i datatabellen dataoploes
# model analyseres
lmUD=ols(data=dataoploes,formula='ess~A+B').fit()
# konfidensinterval for middelværdi beregnes og udskrives
nyData=pd.DataFrame({'A':np.array([0.3,0.7]),'B':np.array([0.4,0.6])})
predUD=lmUD.get_prediction(nyData)
print(pd.DataFrame({'Skøn':predUD.predicted_mean,
'Lower':predUD.conf_int(obs=False)[:,0],
'Upper':predUD.conf_int(obs=False)[:,1]}))