Spline Regression
Using statsmodels library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.metrics import r2_score
import datetime as dt
x = np.linspace(0,1,100)
y = np.sin(4*np.pi*x)
noise = 0.5 * np.random.normal(size=100)
y = y + noise
plt.scatter(x,y)
df = pd.DataFrame(columns= ['x', 'y'])
df.x = x
df.y = y
df
formula = ('y ~ bs(x, df=3, degree=1)')
model_spline = smf.ols(formula=formula, data=df)
result_spline = model_spline.fit()
print(result_spline.summary())
fig, ax = plt.subplots(figsize=(5, 5))
partialResidualPlot(result_spline, df, 'y', 'x', ax)
plt.tight_layout()
plt.show()
def partialResidualPlot(model, df, outcome, feature, ax):
y_pred = model.predict(df)
copy_df = df.copy()
for c in copy_df.columns:
if c == feature:
continue
copy_df[c] = 0.0
feature_prediction = model.predict(copy_df)
results = pd.DataFrame({
'feature': df[feature],
'residual': df[outcome] - y_pred,
'ypartial': feature_prediction - model.params[0],
})
results = results.sort_values(by=['feature'])
smoothed = sm.nonparametric.lowess(results.ypartial, results.feature, frac=1/3)
ax.scatter(results.feature, results.ypartial + results.residual)
# ax.plot(smoothed[:, 0], smoothed[:, 1], color='gray')
ax.plot(results.feature, results.ypartial, color='black')
ax.set_xlabel(feature)
ax.set_ylabel(f'Residual + {feature} contribution')
return ax