pwd

‘d:\\python\\exerise-df\\df-data-analysis’

from scipy import stats
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt

单因素方差分析

dat = pd.read_csv("one-way.csv")
dat.head()



Variety

rep

y

0

A

b1

15.3

1

B

b1

18.0

2

C

b1

16.6

3

D

b1

16.4

4

E

b1

13.7


model = ols('y ~ Variety',dat).fit()
anovat = anova_lm(model)
print(anovat)
df     sum_sq    mean_sq          F        PR(>F)
Variety 5.0 52.378333 10.475667 40.334118 3.662157e-09
Residual 18.0 4.675000 0.259722 NaN NaN

二因素方差分析

dat = pd.read_csv("anova.csv")
dat.head()



loc

cul

y

0

Ann

BH93

4.460

1

Ari

BH93

4.417

2

Aug

BH93

4.669

3

Cas

BH93

4.732

4

Del

BH93

4.390


formula = 'y~ loc + cul'
anova_results = anova_lm(ols(formula,dat).fit())
print(anova_results)
df      sum_sq    mean_sq          F        PR(>F)
loc 17.0 22.671174 1.333598 9.087496 2.327448e-15
cul 8.0 114.536224 14.317028 97.560054 1.611882e-52
Residual 136.0 19.958126 0.146751 NaN NaN