import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('Normal data.xlsx', sheet_name = 'Normal')
# df = pd.read_excel('Normal data.xlsx', sheet_name = 'Skewed')
# df = pd.read_excel('Normal data.xlsx', sheet_name = 'Bimodal')
# df
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(sorted(df.data), bins=40)
(array([ 1.,  4.,  1.,  3.,  6., 14.,  5.,  9., 16., 20., 26., 28., 39.,
        44., 61., 68., 58., 75., 91., 89., 63., 57., 74., 68., 73., 64.,
        59., 44., 36., 22., 22., 10., 15.,  7.,  7., 10.,  3.,  3.,  3.,
         2.]),
 array([20.16  , 20.4015, 20.643 , 20.8845, 21.126 , 21.3675, 21.609 ,
        21.8505, 22.092 , 22.3335, 22.575 , 22.8165, 23.058 , 23.2995,
        23.541 , 23.7825, 24.024 , 24.2655, 24.507 , 24.7485, 24.99  ,
        25.2315, 25.473 , 25.7145, 25.956 , 26.1975, 26.439 , 26.6805,
        26.922 , 27.1635, 27.405 , 27.6465, 27.888 , 28.1295, 28.371 ,
        28.6125, 28.854 , 29.0955, 29.337 , 29.5785, 29.82  ]),
 <BarContainer object of 40 artists>)
m = df.data.mean()
st = df.data.std()

# Standardize the data

for i in range(0,df.shape[0],1):
    df.data.iloc[i] = (df.data.iloc[i]-m)/st
dfn = pd.read_excel('Normal data.xlsx', sheet_name = 'Standard')
q = []
j=0
for i in range(1,dfn.shape[0]+1,1):
    j=i/df.shape[0]
    q_temp = np.quantile(dfn['data'], j)
    q.append(q_temp)
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(sorted(dfn.data), bins=40)
(array([ 2.,  5.,  3.,  6.,  6.,  7., 10.,  9., 24., 24., 39., 37., 54.,
        52., 51., 54., 67., 89., 69., 82., 73., 51., 64., 63., 79., 50.,
        36., 48., 29., 24., 26., 14., 12., 11., 10.,  5.,  7.,  3.,  2.,
         3.]),
 array([-2.85709987, -2.71208322, -2.56706657, -2.42204992, -2.27703326,
        -2.13201661, -1.98699996, -1.84198331, -1.69696666, -1.55195001,
        -1.40693336, -1.26191671, -1.11690006, -0.97188341, -0.82686676,
        -0.68185011, -0.53683346, -0.3918168 , -0.24680015, -0.1017835 ,
         0.04323315,  0.1882498 ,  0.33326645,  0.4782831 ,  0.62329975,
         0.7683164 ,  0.91333305,  1.0583497 ,  1.20336635,  1.348383  ,
         1.49339966,  1.63841631,  1.78343296,  1.92844961,  2.07346626,
         2.21848291,  2.36349956,  2.50851621,  2.65353286,  2.79854951,
         2.94356616]),
 <BarContainer object of 40 artists>)
fig, ax = plt.subplots(figsize=(10, 8))
plt.plot(q,sorted(df.data),'o')
plt.xlabel("Quantile of standard normal distribution")
plt.ylabel("Sample Z-score")
plt.grid()
import statsmodels.api as sm
sm.qqplot(df.data, line ='45')