T-distribution
Distribution series in Statistics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from scipy.stats import norm
df = pd.read_excel('data.xlsx')
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(df.data, bins=30, color= 'green')
df.data.mean()
d = []
sample_number = 50
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = np.mean(b)
d.append(m)
print(np.mean(d))
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'green')
d = []
sample_number = 50
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = np.mean(b)
d.append(m)
print(np.mean(d))
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'red')
d = []
sample_number = 500
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = np.mean(b)
d.append(m)
print(np.mean(d))
# fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'green')
def t_stat(x):
n = len(x)
m = 63.3
s = np.std(x)
t = (np.mean(x) - m)/(s/np.sqrt(n))
return t
d = []
sample_number = 5
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d.append(m)
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'yellow')
plt.xlabel("t values")
plt.ylabel("f(t)")
d = []
sample_number = 10
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d.append(m)
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'blue')
plt.xlabel("t values")
plt.ylabel("f(t)")
d = []
sample_number = 50
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d.append(m)
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'green')
plt.xlabel("t values")
plt.ylabel("f(t)")
d = []
sample_number = 5
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d.append(m)
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'yellow')
plt.xlabel("t values")
plt.ylabel("f(t)")
d = []
sample_number = 10
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d.append(m)
# fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'blue')
plt.xlabel("t values")
plt.ylabel("f(t)")
d = []
sample_number = 50
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d.append(m)
# fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d, bins=30, color= 'green')
plt.xlabel("t values")
plt.ylabel("f(t)")
d1 = []
sample_number = 5
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d1.append(m)
fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d1, bins=30, color= 'yellow')
plt.xlabel("t values")
plt.ylabel("f(t)")
d2 = []
sample_number = 10
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d2.append(m)
# fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d2, bins=30, color= 'green')
plt.xlabel("t values")
plt.ylabel("f(t)")
d3 = []
sample_number = 50
for i in range(1000):
a = random.sample(range(0, 29999), sample_number)
b = list(df.data.iloc[a])
m = t_stat(b)
d3.append(m)
# fig, ax = plt.subplots(figsize=(10, 8))
plt.hist(d3, bins=30, color= 'blue')
plt.xlabel("t values")
plt.ylabel("f(t)")
fig, ax = plt.subplots(figsize=(10, 8))
sns.kdeplot(d1, label = "sample size = 5")
sns.kdeplot(d2, label = "sample size = 10")
sns.kdeplot(d3, label = "sample size = 50")
plt.legend()