import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
import statsmodels.api as sm
import pylab as py
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
url = "Car/toyota.csv"
data = pd.read_csv(url)
data.head()
model year price transmission mileage fuelType tax mpg engineSize
0 GT86 2016 16000 Manual 24089 Petrol 265 36.2 2.0
1 GT86 2017 15995 Manual 18615 Petrol 145 36.2 2.0
2 GT86 2015 13998 Manual 27469 Petrol 265 36.2 2.0
3 GT86 2017 18998 Manual 14736 Petrol 150 36.2 2.0
4 GT86 2017 17498 Manual 36284 Petrol 145 36.2 2.0
data.shape
(6738, 10)

Stepwise rewgression

num = ['mileage', 'tax', 'mpg', 'engineSize']
# num = ['mileage', 'tax', 'mpg', 'engineSize',  'engineSize2']
num = ['mileage', 'tax', 'mpg', 'engineSize']
for i in range(0,len(num),1):
    x = pd.DataFrame(data[num[0:i+1]])
    y = data['price']
    model = sm.OLS(y, x).fit()

    print("With predictor {}".format(num[0:i+1]))
    print("R-squared = ", model.rsquared)
    print("Adj R-squared =", model.rsquared_adj)
    print("AIC =", model.aic)
    print("BIC =", model.bic)
    print("\n")
With predictor ['mileage']
R-squared =  0.3573100248074934
Adj R-squared = 0.3572146277501693
AIC = 144834.10437945105
BIC = 144840.91989787502


With predictor ['mileage', 'tax']
R-squared =  0.6362397640051107
Adj R-squared = 0.6361317591844472
AIC = 141001.0537912962
BIC = 141014.68482814415


With predictor ['mileage', 'tax', 'mpg']
R-squared =  0.7965603667829384
Adj R-squared = 0.7964697477926412
AIC = 137087.42922847485
BIC = 137107.87578374674


With predictor ['mileage', 'tax', 'mpg', 'engineSize']
R-squared =  0.9423743984966928
Adj R-squared = 0.9423401688551702
AIC = 128590.10000072751
BIC = 128617.36207442338


model.summary()
OLS Regression Results
Dep. Variable: price R-squared (uncentered): 0.942
Model: OLS Adj. R-squared (uncentered): 0.942
Method: Least Squares F-statistic: 2.753e+04
Date: Fri, 15 Apr 2022 Prob (F-statistic): 0.00
Time: 11:16:33 Log-Likelihood: -64291.
No. Observations: 6738 AIC: 1.286e+05
Df Residuals: 6734 BIC: 1.286e+05
Df Model: 4
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
mileage -0.1522 0.002 -68.454 0.000 -0.157 -0.148
tax -1.6673 0.562 -2.967 0.003 -2.769 -0.566
mpg -1.7408 1.664 -1.046 0.295 -5.002 1.520
engineSize 1.112e+04 85.197 130.535 0.000 1.1e+04 1.13e+04
Omnibus: 2229.863 Durbin-Watson: 0.914
Prob(Omnibus): 0.000 Jarque-Bera (JB): 11951.306
Skew: 1.492 Prob(JB): 0.00
Kurtosis: 8.802 Cond. No. 6.18e+04


Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The condition number is large, 6.18e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

Use "RegscorePy"

y_pred = model.predict(x)
from RegscorePy import *

AIC

p = 4
y = y
aic.aic(y, y_pred, p) # using RegscorePy
109256.10676454737
n=6738
P=4
n*np.log(model.ssr) - n*np.log(n) + 2*P # using formula
109256.10676454735

BIC

p = 4
y = y
bic.bic(y, y_pred, p) # using RegscorePy
109283.36883824323
n=6738
P=4
n*np.log(model.ssr) - n*np.log(n) + P*np.log(n)  # using formula
109283.36883824322

Weighted Regression

data[num]
mileage tax mpg engineSize engineSize2
0 24089 265 36.2 2.0 4.0
1 18615 145 36.2 2.0 4.0
2 27469 265 36.2 2.0 4.0
3 14736 150 36.2 2.0 4.0
4 36284 145 36.2 2.0 4.0
... ... ... ... ... ...
6733 30000 20 58.9 1.0 3.0
6734 36154 125 50.4 1.3 3.3
6735 46000 125 57.6 1.4 3.4
6736 60700 125 50.4 1.3 3.3
6737 45128 125 50.4 1.3 3.3

6738 rows × 5 columns

data.mpg.min()
2.8
data[num].sort_values('mpg').head(50)
mileage tax mpg engineSize engineSize2
6562 50 260 2.8 2.4 4.4
6552 3350 260 2.8 2.4 4.4
6594 1259 265 2.8 2.4 4.4
6595 10250 260 2.8 2.4 4.4
6617 150 260 2.8 2.4 4.4
6575 7123 260 2.8 2.4 4.4
6596 5000 260 2.8 2.4 4.4
6563 100 260 2.8 2.4 4.4
6583 4 265 2.8 2.4 4.4
6576 5190 260 2.8 2.4 4.4
5966 16429 145 6.0 1.2 3.2
6694 100000 265 23.9 4.2 6.2
6693 143000 325 27.2 3.0 5.0
6698 60000 540 29.7 4.5 6.5
6667 6254 145 30.1 2.8 4.8
6680 789 145 30.1 2.8 4.8
6677 22845 150 30.1 2.8 4.8
6649 11712 145 30.1 2.8 4.8
6657 10083 145 30.1 2.8 4.8
6658 11619 145 30.1 2.8 4.8
6682 3500 145 30.1 2.8 4.8
6684 4 145 30.1 2.8 4.8
6687 27 145 30.1 2.8 4.8
6688 1244 145 30.1 2.8 4.8
6689 4512 145 30.1 2.8 4.8
6653 12543 145 30.1 2.8 4.8
6695 4000 150 30.1 2.8 4.8
6699 6836 145 30.1 2.8 4.8
6651 200 150 30.1 2.8 4.8
6675 15200 145 30.1 2.8 4.8
6659 16634 145 30.1 2.8 4.8
6672 3104 145 30.1 2.8 4.8
6663 8967 150 30.1 2.8 4.8
6666 11404 145 30.1 2.8 4.8
6661 3390 145 30.1 2.8 4.8
6670 1000 145 30.1 2.8 4.8
6671 8813 145 30.1 2.8 4.8
799 45229 330 30.4 2.0 4.0
6409 44000 325 30.7 2.0 4.0
6690 80750 325 31.0 3.0 5.0
6696 113000 555 31.4 3.0 5.0
761 61000 330 31.4 2.0 4.0
6691 160000 325 31.4 3.0 5.0
6686 174419 565 31.4 3.0 5.0
785 116000 330 31.4 2.0 4.0
776 140000 325 32.1 2.0 4.0
763 113000 325 32.1 2.0 4.0
6556 30848 260 32.8 3.0 5.0
70 4000 145 32.8 2.0 4.0
6629 92007 260 32.8 3.0 5.0
data["mpg_weighted"] = data['mpg'] - data.mpg.min()
data.sort_values('mpg').head(50)
model year price transmission mileage fuelType tax mpg engineSize engineSize2 mpg_weighted
6562 Hilux 2020 18495 Manual 50 Diesel 260 2.8 2.4 4.4 0.0
6552 Hilux 2019 28495 Automatic 3350 Diesel 260 2.8 2.4 4.4 0.0
6594 Hilux 2020 39257 Automatic 1259 Diesel 265 2.8 2.4 4.4 0.0
6595 Hilux 2019 27850 Automatic 10250 Diesel 260 2.8 2.4 4.4 0.0
6617 Hilux 2020 36995 Automatic 150 Diesel 260 2.8 2.4 4.4 0.0
6575 Hilux 2019 26500 Automatic 7123 Diesel 260 2.8 2.4 4.4 0.0
6596 Hilux 2020 30500 Automatic 5000 Diesel 260 2.8 2.4 4.4 0.0
6563 Hilux 2020 23495 Manual 100 Diesel 260 2.8 2.4 4.4 0.0
6583 Hilux 2019 26995 Automatic 4 Diesel 265 2.8 2.4 4.4 0.0
6576 Hilux 2019 20500 Manual 5190 Diesel 260 2.8 2.4 4.4 0.0
5966 C-HR 2018 16690 Manual 16429 Petrol 145 6.0 1.2 3.2 3.2
6694 Land Cruiser 1998 19990 Manual 100000 Diesel 265 23.9 4.2 6.2 21.1
6693 Land Cruiser 2004 6450 Automatic 143000 Diesel 325 27.2 3.0 5.0 24.4
6698 Land Cruiser 2014 44990 Automatic 60000 Diesel 540 29.7 4.5 6.5 26.9
6667 Land Cruiser 2019 50995 Semi-Auto 6254 Diesel 145 30.1 2.8 4.8 27.3
6680 Land Cruiser 2019 48995 Automatic 789 Diesel 145 30.1 2.8 4.8 27.3
6677 Land Cruiser 2019 42990 Semi-Auto 22845 Diesel 150 30.1 2.8 4.8 27.3
6649 Land Cruiser 2019 39498 Semi-Auto 11712 Diesel 145 30.1 2.8 4.8 27.3
6657 Land Cruiser 2019 42444 Semi-Auto 10083 Diesel 145 30.1 2.8 4.8 27.3
6658 Land Cruiser 2019 40999 Semi-Auto 11619 Diesel 145 30.1 2.8 4.8 27.3
6682 Land Cruiser 2020 50995 Automatic 3500 Diesel 145 30.1 2.8 4.8 27.3
6684 Land Cruiser 2020 47885 Automatic 4 Diesel 145 30.1 2.8 4.8 27.3
6687 Land Cruiser 2020 45950 Automatic 27 Diesel 145 30.1 2.8 4.8 27.3
6688 Land Cruiser 2020 52990 Automatic 1244 Diesel 145 30.1 2.8 4.8 27.3
6689 Land Cruiser 2019 44995 Automatic 4512 Diesel 145 30.1 2.8 4.8 27.3
6653 Land Cruiser 2019 39498 Semi-Auto 12543 Diesel 145 30.1 2.8 4.8 27.3
6695 Land Cruiser 2020 54550 Automatic 4000 Diesel 150 30.1 2.8 4.8 27.3
6699 Land Cruiser 2019 49995 Automatic 6836 Diesel 145 30.1 2.8 4.8 27.3
6651 Land Cruiser 2020 44995 Semi-Auto 200 Diesel 150 30.1 2.8 4.8 27.3
6675 Land Cruiser 2019 44935 Semi-Auto 15200 Diesel 145 30.1 2.8 4.8 27.3
6659 Land Cruiser 2019 42995 Semi-Auto 16634 Diesel 145 30.1 2.8 4.8 27.3
6672 Land Cruiser 2020 52291 Semi-Auto 3104 Diesel 145 30.1 2.8 4.8 27.3
6663 Land Cruiser 2019 45995 Semi-Auto 8967 Diesel 150 30.1 2.8 4.8 27.3
6666 Land Cruiser 2019 40995 Semi-Auto 11404 Diesel 145 30.1 2.8 4.8 27.3
6661 Land Cruiser 2020 50995 Semi-Auto 3390 Diesel 145 30.1 2.8 4.8 27.3
6670 Land Cruiser 2019 54991 Semi-Auto 1000 Diesel 145 30.1 2.8 4.8 27.3
6671 Land Cruiser 2019 47795 Semi-Auto 8813 Diesel 145 30.1 2.8 4.8 27.3
799 RAV4 2004 5495 Automatic 45229 Petrol 330 30.4 2.0 4.0 27.6
6409 Avensis 2004 3495 Automatic 44000 Petrol 325 30.7 2.0 4.0 27.9
6690 Land Cruiser 2005 8000 Manual 80750 Diesel 325 31.0 3.0 5.0 28.2
6696 Land Cruiser 2006 7240 Automatic 113000 Diesel 555 31.4 3.0 5.0 28.6
761 RAV4 2008 5195 Automatic 61000 Petrol 330 31.4 2.0 4.0 28.6
6691 Land Cruiser 2004 5975 Automatic 160000 Diesel 325 31.4 3.0 5.0 28.6
6686 Land Cruiser 2008 6950 Automatic 174419 Diesel 565 31.4 3.0 5.0 28.6
785 RAV4 2008 4480 Automatic 116000 Petrol 330 31.4 2.0 4.0 28.6
776 RAV4 2002 1600 Manual 140000 Petrol 325 32.1 2.0 4.0 29.3
763 RAV4 2005 2394 Manual 113000 Petrol 325 32.1 2.0 4.0 29.3
6556 Hilux 2016 19498 Automatic 30848 Diesel 260 32.8 3.0 5.0 30.0
70 GT86 2019 24990 Automatic 4000 Petrol 145 32.8 2.0 4.0 30.0
6629 Hilux 2013 12000 Automatic 92007 Diesel 260 32.8 3.0 5.0 30.0
num = ['mileage', 'tax', 'mpg', 'engineSize']
# num = ['mileage', 'tax', 'mpg', 'engineSize', 'mpg_weighted']
num = ['mileage', 'tax', 'mpg', 'engineSize']
for i in range(0,len(num),1):
    x = pd.DataFrame(data[num[0:i+1]])
    y = data['price']
    model = linear_model.LinearRegression()
    model.fit(x, y)
model.coef_
array([-1.47097078e-01,  2.71207473e+00,  3.22056993e+01,  1.18159759e+04])
for i in range(0,len(num),1):
    x = pd.DataFrame(data[num[0:i+1]])
    y = data['price']
    
    model = linear_model.LinearRegression()
    model.fit(x, y, sample_weight = data['mpg_weighted'])
model.coef_
array([-1.34164121e-01,  5.10535692e+00,  4.22850836e+01,  1.15948701e+04])