Linear Regression 8 | Python for SLR & MLR

Series: Linear Regression

Linear Regression 8 | Python for SLR & MLR

  1. Import the Packages for Analysis
from math import *
import pandas as pd
import numpy as np
from scipy import stats
import scipy.linalg as linalg
from sklearn import linear_model
import statsmodels.api as sm
import statsmodels.formula.api as smf

2. SLR

# modeling
model = smf.ols('y ~ x', data=df).fit()
model.summary()
# prediction
xs = <list of xs to be predicted>
prediction=model.get_prediction(exog=dict(x=xs))
prediction.summary_frame(alpha=0.05)

3. MLR

# modeling
model = smf.ols('y ~ x1+x2+x3...', data=df).fit()
model.summary()
# forced dummy encoding
model= smf.ols('y ~ C(x)', data=df).fit()
model.summary()
# MLR Calculation
b = np.dot(np.dot(linalg.inv(np.dot(X.T, X)), Xt), Y)   # OLSE
e = Y - np.dot(X, b) # Residual
H = np.dot(np.dot(X, linalg.inv(np.dot(X.T, X))), X.T) # Hat Matrix
In = np.identity(6)
Jn = np.ones(n**2).reshape(n,n)
SSE = np.dot(np.dot(Y.T, (np.identity(n) - H)), Y)     # SSE
SSE = np.dot(e.T, e) # SSE
SSE = np.dot(Y.T, e) # SSE
SSE = np.dot(Y.T, Y) - np.dot(np.dot(b.T, X.T), Y) # SSE
SSR = np.dot(np.dot(Y.T, H - Jn * 1 / n), Y)           # SSR
SST = np.dot(np.dot(Y.T, In - Jn * 1 / n),Y) # SST
SST = SSR + SSE # SST
MSE = SSE / (n-k)                                      # MSE
MSR = SSR / (k-1) # MSR
MST = SST / (n-1) # MST

4. ANOVA

sm.stats.anova_lm(model, typ=1)
sm.stats.anova_lm(model, typ=2)
sm.stats.anova_lm(model, typ=3)

5. Inference

# rounding
np.round(<exp>, 2)
# p value
pval = (1 - <cdf>) # one tail
pval = (1 - <cdf>) * 2 # two tails
pval = (1 - <cdf.f>) # one tail F test
# hypothesis testing
stats.norm.ppf(1-<alpha>[/2])
stats.norm.cdf(<Z>)
stats.t.ppf(1-<alpha>[/2], <degreeoffreedom>)
stats.t.cdf(<T>, <degreeoffreedom>)
stats.f.ppf(1-<alpha>[/2], 1, n-2)              # SLR
stats.f.cdf(<F>, 1, n-2) # SLR
F = MSR/MSE
stats.f.ppf(1-<alpha>[/2], k-1, n-k)              # MLR
stats.f.cdf(<F>, k-1, n-k) # MLR
F = MSR/MSE
stats.chi2.ppf(1-<alpha>[/2], <degreeoffreedom>)
stats.chi2.cdf(<Χ_square>, <degreeoffreedom>)
# measure
R2 = SSR/SST
R2 = 1 - SSE/SST
R2 =model.rsquared
adjR2 = 1 - (SSE*(n-k)) / (SST/(n-1))
adjR2 = 1 - MSE / MST
adjR2 = 1 - (n-1)/(n-k)*R2
adjR2 = model.rsquared_adj