python3 学习机器学习api
使用了三种集成回归模型
git: https://github.com/linyi0604/MachineLearning
代码:
1 from sklearn.datasets import load_boston
2 from sklearn.cross_validation import train_test_split
3 from sklearn.preprocessing import StandardScaler
4 from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
5 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
6 import numpy as np
7
8 '''
9 随机森林回归
10 极端随机森林回归
11 梯度提升回归
12
13 通常集成模型能够取得非常好的表现
14 '''
15
16 # 1 准备数据
17 # 读取波士顿地区房价信息
18 boston = load_boston()
19 # 查看数据描述
20 # print(boston.DESCR) # 共506条波士顿地区房价信息,每条13项数值特征描述和目标房价
21 # 查看数据的差异情况
22 # print("最大房价:", np.max(boston.target)) # 50
23 # print("最小房价:",np.min(boston.target)) # 5
24 # print("平均房价:", np.mean(boston.target)) # 22.532806324110677
25
26 x = boston.data
27 y = boston.target
28
29 # 2 分割训练数据和测试数据
30 # 随机采样25%作为测试 75%作为训练
31 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
32
33 # 3 训练数据和测试数据进行标准化处理
34 ss_x = StandardScaler()
35 x_train = ss_x.fit_transform(x_train)
36 x_test = ss_x.transform(x_test)
37
38 ss_y = StandardScaler()
39 y_train = ss_y.fit_transform(y_train.reshape(-1, 1))
40 y_test = ss_y.transform(y_test.reshape(-1, 1))
41
42 # 4 三种集成回归模型进行训练和预测
43 # 随机森林回归
44 rfr = RandomForestRegressor()
45 # 训练
46 rfr.fit(x_train, y_train)
47 # 预测 保存预测结果
48 rfr_y_predict = rfr.predict(x_test)
49
50 # 极端随机森林回归
51 etr = ExtraTreesRegressor()
52 # 训练
53 etr.fit(x_train, y_train)
54 # 预测 保存预测结果
55 etr_y_predict = rfr.predict(x_test)
56
57 # 梯度提升回归
58 gbr = GradientBoostingRegressor()
59 # 训练
60 gbr.fit(x_train, y_train)
61 # 预测 保存预测结果
62 gbr_y_predict = rfr.predict(x_test)
63
64 # 5 模型评估
65 # 随机森林回归模型评估
66 print("随机森林回归的默认评估值为:", rfr.score(x_test, y_test))
67 print("随机森林回归的R_squared值为:", r2_score(y_test, rfr_y_predict))
68 print("随机森林回归的均方误差为:", mean_squared_error(ss_y.inverse_transform(y_test),
69 ss_y.inverse_transform(rfr_y_predict)))
70 print("随机森林回归的平均绝对误差为:", mean_absolute_error(ss_y.inverse_transform(y_test),
71 ss_y.inverse_transform(rfr_y_predict)))
72
73 # 极端随机森林回归模型评估
74 print("极端随机森林回归的默认评估值为:", etr.score(x_test, y_test))
75 print("极端随机森林回归的R_squared值为:", r2_score(y_test, gbr_y_predict))
76 print("极端随机森林回归的均方误差为:", mean_squared_error(ss_y.inverse_transform(y_test),
77 ss_y.inverse_transform(gbr_y_predict)))
78 print("极端随机森林回归的平均绝对误差为:", mean_absolute_error(ss_y.inverse_transform(y_test),
79 ss_y.inverse_transform(gbr_y_predict)))
80
81 # 梯度提升回归模型评估
82 print("梯度提升回归回归的默认评估值为:", gbr.score(x_test, y_test))
83 print("梯度提升回归回归的R_squared值为:", r2_score(y_test, etr_y_predict))
84 print("梯度提升回归回归的均方误差为:", mean_squared_error(ss_y.inverse_transform(y_test),
85 ss_y.inverse_transform(etr_y_predict)))
86 print("梯度提升回归回归的平均绝对误差为:", mean_absolute_error(ss_y.inverse_transform(y_test),
87 ss_y.inverse_transform(etr_y_predict)))
88
89 '''
90 随机森林回归的默认评估值为: 0.8391590262557747
91 随机森林回归的R_squared值为: 0.8391590262557747
92 随机森林回归的均方误差为: 12.471817322834646
93 随机森林回归的平均绝对误差为: 2.4255118110236227
94
95 极端随机森林回归的默认评估值为: 0.783339502805047
96 极端随机森林回归的R_squared值为: 0.8391590262557747
97 极端随机森林回归的均方误差为: 12.471817322834646
98 极端随机森林回归的平均绝对误差为: 2.4255118110236227
99
100 GradientBoostingRegressor回归的默认评估值为: 0.8431187344932869
101 GradientBoostingRegressor回归的R_squared值为: 0.8391590262557747
102 GradientBoostingRegressor回归的均方误差为: 12.471817322834646
103 GradientBoostingRegressor回归的平均绝对误差为: 2.4255118110236227
104 '''