Generalized Additive Models
Using pyGAM
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.metrics import r2_score
from pygam import LinearGAM, s, f, l, te
from pygam.datasets import wage
def partialResidualPlot(model, df, outcome, feature, ax):
y_pred = model.predict(df)
copy_df = df.copy()
for c in copy_df.columns:
if c == feature:
continue
copy_df[c] = 0.0
feature_prediction = model.predict(copy_df)
results = pd.DataFrame({
'feature': df[feature],
'residual': df[outcome] - y_pred,
'ypartial': feature_prediction - model.params[0],
})
results = results.sort_values(by=['feature'])
smoothed = sm.nonparametric.lowess(results.ypartial, results.feature, frac=1/3)
ax.scatter(results.feature, results.ypartial + results.residual)
# ax.plot(smoothed[:, 0], smoothed[:, 1], color='gray')
ax.plot(results.feature, results.ypartial, color='black')
ax.set_xlabel(feature)
ax.set_ylabel(f'Residual + {feature} contribution')
return ax
data = pd.read_csv("Fish.csv")
data
fig, ax = plt.subplots(figsize=(10, 8))
y = data.Weight
x = data.Width
plt.scatter(x,y)
plt.ylabel("weight of fish in gram")
plt.xlabel("diagonal width in cm")
plt.grid()
result_poly = smf.ols('Weight ~ Width +' + 'I(Width**2)', data=data).fit()
print(result_poly.summary())
fig, ax = plt.subplots(figsize=(10, 8))
pred_poly = result_poly.predict(data.Width)
plt.plot(data.Width, data.Weight,'o')
plt.xlabel('Width')
plt.ylabel('Weight')
pfit = pd.DataFrame(columns = ['Width','Pred'])
pfit.Width = data.Width
pfit.Pred = pred_poly
pfit = pfit.sort_values(by=['Width'])
plt.plot(pfit.Width, pfit.Pred)
plt.grid()
formula = ('Weight ~ bs(Width, df=3, degree=2)')
model_spline = smf.ols(formula=formula, data=data)
result_spline = model_spline.fit()
print(result_spline.summary())
fig, ax = plt.subplots(figsize=(10, 8))
partialResidualPlot(result_spline, data, 'Weight', 'Width', ax)
plt.tight_layout()
plt.grid()
plt.xlabel('Width')
plt.ylabel('Weight')
data
predictors = ['Width']
outcome = ['Weight']
x = data[predictors].values
y = data[outcome]
gam = LinearGAM(l(0))
gam.gridsearch(x, y)
fig, ax = plt.subplots(figsize=(10, 8))
XX = gam.generate_X_grid(term=0)
plt.plot(XX, gam.predict(XX), 'r--')
plt.plot(XX, gam.prediction_intervals(XX, width=.95), color='b', ls='--')
plt.scatter(X, y, facecolor='gray', edgecolors='none')
predictors = ['Width']
outcome = ['Weight']
x = data[predictors].values
y = data[outcome]
gam = LinearGAM(s(0, n_splines=20))
gam.gridsearch(x, y)
fig, ax = plt.subplots(figsize=(10, 8))
XX = gam.generate_X_grid(term=0)
plt.plot(XX, gam.predict(XX), 'r--')
plt.plot(XX, gam.prediction_intervals(XX, width=.95), color='b', ls='--')
plt.scatter(X, y, facecolor='gray', edgecolors='none')