xxxxxxxxxx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
# funktionen summaryLM defineres
def summaryLM(lmUD):
pd.options.display.float_format = '{:,.4f}'.format
print('Estimated Coefficients:')
print(lmUD.summary2().tables[1])
print(' ')
print('Number of observations:','{:.0f}'.format(lmUD.nobs),
' Error degrees of freedom:','{:.0f}'.format(lmUD.df_resid))
print('Root Mean Squared Error:',format(np.sqrt(lmUD.mse_resid),'.4g'))
print('R-squared:',format(lmUD.rsquared,'.3g'),' Adjusted R-Squared:',
format(lmUD.rsquared_adj,'.3g'))
print('F-statistic vs. constant model:',format(lmUD.fvalue,'.1f'),
' p-value =',format(lmUD.f_pvalue,'.3g'))
def refline(haeldning,skaering,linestyle='-',color='b',ax=plt):
if (ax==plt):
axx=plt.gca()
else:
axx=ax
x_endePkt=axx.get_xlim()
x_midt=(x_endePkt[1]+x_endePkt[0])/2
y_midt=skaering+haeldning*x_midt
axx.axline([x_midt,y_midt],slope=haeldning,linestyle=linestyle,color=color)
# data indskrives
afstand0=np.array([0.032,0.034,0.214,0.263,0.275,0.275,0.450,
0.500,0.500,0.630,0.800,0.900,0.900,0.900,0.900,1.000,1.100,
1.100,1.400,1.700,2.000,2.000,2.000,2.000])
hast0=np.array([170,290,-130,-70,-185,-220,200,290,270,200,300,
-30,650,150,500,920,450,500,500,960,500,850,800,1090])
print("Opstart er gennemført: afstand0 og hast0 er indskrevet")
xxxxxxxxxx
# Opstart ovenfor skal være kørt
# datatabel dannes
# (hvis data indlæses med read_csv har man allerede en datatabel)
dataHubble=pd.DataFrame({'afstand':afstand0,'hast':hast0})
# model analyseres
lmUD=ols(data=dataHubble,formula='hast~afstand').fit()
# figur med regressionslije indtegnet
plt.plot(dataHubble.afstand,dataHubble.hast,'o')
plt.xlabel('Afstand'); plt.ylabel('Hastighed')
par=lmUD.params
refline(par[1],par[0])
plt.show()
summaryLM(lmUD)
7.6.1 Analyse af regressionsmodel med kendt skæring
xxxxxxxxxx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
# funktionen summaryLM defineres
def summaryLM(lmUD):
pd.options.display.float_format = '{:,.4f}'.format
print('Estimated Coefficients:')
print(lmUD.summary2().tables[1])
print(' ')
print('Number of observations:','{:.0f}'.format(lmUD.nobs),
' Error degrees of freedom:','{:.0f}'.format(lmUD.df_resid))
print('Root Mean Squared Error:',format(np.sqrt(lmUD.mse_resid),'.4g'))
print('R-squared:',format(lmUD.rsquared,'.3g'),' Adjusted R-Squared:',
format(lmUD.rsquared_adj,'.3g'))
print('F-statistic vs. constant model:',format(lmUD.fvalue,'.1f'),
' p-value =',format(lmUD.f_pvalue,'.3g'))
def refline(haeldning,skaering,linestyle='-',color='b',ax=plt):
if (ax==plt):
axx=plt.gca()
else:
axx=ax
x_endePkt=axx.get_xlim()
x_midt=(x_endePkt[1]+x_endePkt[0])/2
y_midt=skaering+haeldning*x_midt
axx.axline([x_midt,y_midt],slope=haeldning,linestyle=linestyle,color=color)
# data indskrives
afstand0=np.array([0.032,0.034,0.214,0.263,0.275,0.275,0.450,\
0.500,0.500,0.630,0.800,0.900,0.900,0.900,0.900,1.000,1.100,\
1.100,1.400,1.700,2.000,2.000,2.000,2.000])
hast0=np.array([170,290,-130,-70,-185,-220,200,290,270,200,300,\
-30,650,150,500,920,450,500,500,960,500,850,800,1090])
print("Opstart er gennemført: afstand0 og hast0 er indskrevet")
xxxxxxxxxx
# Opstart ovenfor skal være kørt
# datatabel dannes
# (hvis data indlæses med read_csv har man allerede en datatabel)
dataHubble=pd.DataFrame({'afstand':afstand0,'hast':hast0})
# model analyseres
lmUD=ols(data=dataHubble,formula='hast~afstand').fit()
lmUD1=ols(data=dataHubble,formula='hast~afstand-1').fit()
summaryLM(lmUD1)
# figur med to linjer
plt.plot(dataHubble.afstand,dataHubble.hast,'o')
plt.xlabel('Afstand'); plt.ylabel('Hastighed')
par=lmUD.params
refline(par[1],par[0])
par1=lmUD1.params
refline(par1[0],0,color='r')
plt.show()