import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.api as sm
import statsmodels.formula.api as smf
import pylab as py

df = pd.read_csv("salary.csv")
df

fig = plt.figure(figsize=(15,10))
plt.plot(df['People_managing'], df['Salary'], 'o')
plt.grid()

fig = plt.figure(figsize=(15,10))
plt.plot(df['Projects'], df['Salary'], 'o')
plt.grid()

fig = plt.figure(figsize=(15,10))
plt.plot(df['YearsExperience'], df['Salary'], 'o')
plt.xlabel('YearsExperience')
plt.ylabel('Salary')
plt.grid()

model = smf.ols(formula = 'Salary ~ Projects + People_managing + YearsExperience', data = df)
# model = smf.ols(formula = 'Salary ~ YearsExperience', data = df)
# model = smf.ols(formula = 'Salary ~ Projects', data = df)
model = model.fit()
# model = model.fit(cov_type="hc0")

# model.predict(y)

print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 Salary   R-squared:                       0.963
Model:                            OLS   Adj. R-squared:                  0.959
Method:                 Least Squares   F-statistic:                     235.6
Date:                Thu, 21 Apr 2022   Prob (F-statistic):           1.82e-19
Time:                        18:23:38   Log-Likelihood:                -310.21
No. Observations:                  31   AIC:                             628.4
Df Residuals:                      27   BIC:                             634.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===================================================================================
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept        2.567e+04   2221.384     11.556      0.000    2.11e+04    3.02e+04
Projects          333.4580    282.859      1.179      0.249    -246.920     913.836
People_managing -2447.4776   2426.626     -1.009      0.322   -7426.503    2531.548
YearsExperience  9633.2604   1210.014      7.961      0.000    7150.516    1.21e+04
==============================================================================
Omnibus:                        2.060   Durbin-Watson:                   1.958
Prob(Omnibus):                  0.357   Jarque-Bera (JB):                1.859
Skew:                           0.555   Prob(JB):                        0.395
Kurtosis:                       2.545   Cond. No.                         56.0
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

	YearsExperience	Projects	People_managing	Salary
0	1.1	4	0	39343
1	1.3	5	0	46205
2	1.5	6	0	37731
3	2.0	3	1	43525
4	2.2	5	1	39891
5	2.9	6	1	56642
6	3.0	8	1	60150
7	3.2	7	1	54445
8	3.2	9	2	64445
9	3.7	10	2	57189
10	3.9	15	2	63218
11	4.0	12	2	55794
12	4.0	7	2	56957
13	4.1	22	3	57081
14	4.5	12	3	61111
15	4.9	20	3	67938
16	5.1	21	3	66029
17	5.3	16	3	83088
18	5.9	28	4	81363
19	6.0	23	4	93940
20	6.8	25	4	91738
21	7.1	22	4	98273
22	7.9	35	4	101302
23	8.2	31	4	113812
24	8.7	27	4	109431
25	9.0	24	5	105582
26	9.5	33	5	116969
27	9.6	30	5	112635
28	10.3	29	5	122391
29	10.5	31	5	121872
30	11.0	28	5	126522