import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pylab as py
df = pd.read_csv("salary.csv")
df
YearsExperience Projects People_managing Salary
0 1.1 4 0 39343
1 1.3 5 0 46205
2 1.5 6 0 37731
3 2.0 3 1 43525
4 2.2 5 1 39891
5 2.9 6 1 56642
6 3.0 8 1 60150
7 3.2 7 1 54445
8 3.2 9 2 64445
9 3.7 10 2 57189
10 3.9 15 2 63218
11 4.0 12 2 55794
12 4.0 7 2 56957
13 4.1 22 3 57081
14 4.5 12 3 61111
15 4.9 20 3 67938
16 5.1 21 3 66029
17 5.3 16 3 83088
18 5.9 28 4 81363
19 6.0 23 4 93940
20 6.8 25 4 91738
21 7.1 22 4 98273
22 7.9 35 4 101302
23 8.2 31 4 113812
24 8.7 27 4 109431
25 9.0 24 5 105582
26 9.5 33 5 116969
27 9.6 30 5 112635
28 10.3 29 5 122391
29 10.5 31 5 121872
30 11.0 28 5 126522
fig = plt.figure(figsize=(15,10))
plt.plot(df['People_managing'], df['Salary'], 'o')
plt.grid()
fig = plt.figure(figsize=(15,10))
plt.plot(df['Projects'], df['Salary'], 'o')
plt.grid()
fig = plt.figure(figsize=(15,10))
plt.plot(df['YearsExperience'], df['Salary'], 'o')
plt.xlabel('YearsExperience')
plt.ylabel('Salary')
plt.grid()
model = smf.ols(formula = 'Salary ~ Projects + People_managing + YearsExperience', data = df)
# model = smf.ols(formula = 'Salary ~ YearsExperience', data = df)
# model = smf.ols(formula = 'Salary ~ Projects', data = df)
model = model.fit()
# model = model.fit(cov_type="hc0")
# model.predict(y)
print(model.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 Salary   R-squared:                       0.963
Model:                            OLS   Adj. R-squared:                  0.959
Method:                 Least Squares   F-statistic:                     235.6
Date:                Thu, 21 Apr 2022   Prob (F-statistic):           1.82e-19
Time:                        18:23:38   Log-Likelihood:                -310.21
No. Observations:                  31   AIC:                             628.4
Df Residuals:                      27   BIC:                             634.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===================================================================================
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept        2.567e+04   2221.384     11.556      0.000    2.11e+04    3.02e+04
Projects          333.4580    282.859      1.179      0.249    -246.920     913.836
People_managing -2447.4776   2426.626     -1.009      0.322   -7426.503    2531.548
YearsExperience  9633.2604   1210.014      7.961      0.000    7150.516    1.21e+04
==============================================================================
Omnibus:                        2.060   Durbin-Watson:                   1.958
Prob(Omnibus):                  0.357   Jarque-Bera (JB):                1.859
Skew:                           0.555   Prob(JB):                        0.395
Kurtosis:                       2.545   Cond. No.                         56.0
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.