三层神经网络,训练0到9十个数字并测试:
1 import numpy
2 import scipy.special
3 # import matplotlib.pyplot
4 import time
5
6
7 class NeuralNetwork:
8
9 # 初始化神经网络
10 def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate):
11 # 设置输入层、隐藏层、输出层的节点数
12 self.inodes = inputnodes
13 self.hnodes = hiddenodes
14 self.onodes = outputnodes
15
16 # 学习因子
17 self.lr = learningrate
18
19 # 输入层、隐藏层、输出层之间的链接权重
20 # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5)
21 # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5)
22 # 利用正态分布采样权重
23 self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes))
24 self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes))
25
26 # S函数
27 self.activation_function = lambda x: scipy.special.expit(x)
28 pass
29
30 # 训练
31 def train(self, inputs_list, targets_list):
32 # 输入层转矩阵
33 inputs = numpy.array(inputs_list, ndmin=2).T
34 targets = numpy.array(targets_list, ndmin=2).T
35
36 # 隐藏层输入=权重点乘输入层矩阵
37 hidden_inputs = numpy.dot(self.wih, inputs)
38 # 隐藏层应用S函数
39 hidden_outputs = self.activation_function(hidden_inputs)
40
41 # 输出层输入=权重点乘隐藏层输入矩阵
42 final_inputs = numpy.dot(self.who, hidden_outputs)
43 # 输出层输入应用S函数
44 final_outputs = self.activation_function(final_inputs)
45
46 # 计算误差
47 output_errors = targets - final_outputs;
48 # 计算隐藏层误差
49 hidden_errors = numpy.dot(self.who.T, output_errors)
50 # 更新隐藏层和输出层之间的权重
51 self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)),
52 numpy.transpose(hidden_outputs))
53 # 更新输入层和隐藏层之间的权重
54 self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),
55 numpy.transpose(inputs))
56
57 pass
58
59 # 查询
60 def query(self, inputs_list):
61 # 输入层转矩阵
62 inputs = numpy.array(inputs_list, ndmin=2).T
63
64 # 隐藏层输入=权重点乘输入层矩阵
65 hidden_inputs = numpy.dot(self.wih, inputs)
66 # 隐藏层应用S函数
67 hidden_outputs = self.activation_function(hidden_inputs)
68
69 # 输出层输入=权重点乘隐藏层输入矩阵
70 final_inputs = numpy.dot(self.who, hidden_outputs)
71 # 输出层输入应用S函数
72 final_outputs = self.activation_function(final_inputs)
73
74 return final_outputs
75
76
77 # 输入、隐藏、输出三层节点数
78 input_nodes = 784
79 hidden_nodes = 100
80 output_nodes = 10
81
82 # 学习因子
83 learning_rate = 0.2
84
85 # 创建神经网络
86 n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
87
88 # 读取训练数据
89 # training_data_file = open("web/mnist_dataset/mnist_train_100.csv")
90 training_data_file = open("web/mnist_dataset/mnist_train.csv")
91 training_data_list = training_data_file.readlines()
92 training_data_file.close()
93
94 # 世代,所有数据训练一遍为一个世代
95 epochs = 1
96 start = int(time.time())
97 for e in range(epochs):
98 # 训练神经网络
99 for record in training_data_list:
100 # 按逗号切分成数组
101 all_values = record.split(",")
102 # 缩放并转换成0.01到0.99之间的数组
103 inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
104 # 构建真实输出数组,除了目标位置是0.99,其他都是0.01
105 targets = numpy.zeros(output_nodes) + 0.01
106 targets[int(all_values[0])] = 0.99
107 n.train(inputs, targets)
108 pass
109 pass
110 end = int(time.time())
111 print("训练用时=", end - start, "秒")
112
113 # 测试数据
114 # test_data_file = open("web/mnist_dataset/mnist_test_10.csv")
115 test_data_file = open("web/mnist_dataset/mnist_test.csv")
116 test_data_list = test_data_file.readlines()
117 test_data_file.close()
118 # all_values = test_data_list[0].split(",")
119 # image_array = numpy.asfarray(all_values[1:]).reshape((28, 28))
120 # matplotlib.pyplot.imshow(image_array, cmap='Greys', interpolation='None')
121 #
122 # value = n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)
123 # print(value)
124
125 # 统计分数
126 scorecard = []
127
128 start = int(time.time())
129 # 检查所有测试数据
130 for record in test_data_list:
131 all_values = record.split(",")
132 # 正确答案
133 correct_label = int(all_values[0])
134 # print(correct_label, "正确答案")
135
136 inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
137 outputs = n.query(inputs)
138 label = numpy.argmax(outputs)
139 # print(label, "神经网络答案")
140
141 if label == correct_label:
142 scorecard.append(1)
143 else:
144 scorecard.append(0)
145 pass
146 end = int(time.time())
147 print("检查用时=", end - start, "秒")
148
149 # print(scorecard)
150 scorecard_array = numpy.asarray(scorecard)
151 print("正确率=", scorecard_array.sum() / scorecard_array.size)
验证码的数字和字母识别:
1 import numpy
2 import scipy.special
3 import scipy.ndimage.interpolation
4 # import matplotlib.pyplot
5 import time
6 import string
7 from random import shuffle
8
9 import matplotlib.pyplot
10
11
12
13 class NeuralNetwork:
14
15 # 初始化神经网络
16 def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate):
17 # 设置输入层、隐藏层、输出层的节点数
18 self.inodes = inputnodes
19 self.hnodes = hiddenodes
20 self.onodes = outputnodes
21
22 # 学习因子
23 self.lr = learningrate
24
25 # 输入层、隐藏层、输出层之间的链接权重
26 # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5)
27 # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5)
28 # 利用正态分布采样权重
29 self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes))
30 self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes))
31
32 # S函数
33 self.activation_function = lambda x: scipy.special.expit(x)
34 self.inverse_activation_function = lambda x: scipy.special.logit(x)
35 pass
36
37 # 训练
38 def train(self, inputs_list, targets_list):
39 # 输入层转矩阵
40 inputs = numpy.array(inputs_list, ndmin=2).T
41 targets = numpy.array(targets_list, ndmin=2).T
42
43 # 隐藏层输入=权重点乘输入层矩阵
44 hidden_inputs = numpy.dot(self.wih, inputs)
45 # 隐藏层应用S函数
46 hidden_outputs = self.activation_function(hidden_inputs)
47
48 # 输出层输入=权重点乘隐藏层输入矩阵
49 final_inputs = numpy.dot(self.who, hidden_outputs)
50 # 输出层输入应用S函数
51 final_outputs = self.activation_function(final_inputs)
52
53 # 计算误差
54 output_errors = targets - final_outputs
55 # 计算隐藏层误差
56 hidden_errors = numpy.dot(self.who.T, output_errors)
57 # 更新隐藏层和输出层之间的权重
58 self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)),
59 numpy.transpose(hidden_outputs))
60 # 更新输入层和隐藏层之间的权重
61 self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),
62 numpy.transpose(inputs))
63
64 pass
65
66 # 查询
67 def query(self, inputs_list):
68 # 输入层转矩阵
69 inputs = numpy.array(inputs_list, ndmin=2).T
70
71 # 隐藏层输入=权重点乘输入层矩阵
72 hidden_inputs = numpy.dot(self.wih, inputs)
73 # 隐藏层应用S函数
74 hidden_outputs = self.activation_function(hidden_inputs)
75
76 # 输出层输入=权重点乘隐藏层输入矩阵
77 final_inputs = numpy.dot(self.who, hidden_outputs)
78 # 输出层输入应用S函数
79 final_outputs = self.activation_function(final_inputs)
80
81 return final_outputs
82
83 def back_query(self, targets_list):
84 # transpose the targets list to a vertical array
85 final_outputs = numpy.array(targets_list, ndmin=2).T
86
87 # calculate the signal into the final output layer
88 final_inputs = self.inverse_activation_function(final_outputs)
89
90 # calculate the signal out of the hidden layer
91 hidden_outputs = numpy.dot(self.who.T, final_inputs)
92 # scale them back to 0.01 to .99
93 hidden_outputs -= numpy.min(hidden_outputs)
94 hidden_outputs /= numpy.max(hidden_outputs)
95 hidden_outputs *= 0.98
96 hidden_outputs += 0.01
97
98 # calculate the signal into the hidden layer
99 hidden_inputs = self.inverse_activation_function(hidden_outputs)
100
101 # calculate the signal out of the input layer
102 inputs = numpy.dot(self.wih.T, hidden_inputs)
103 # scale them back to 0.01 to .99
104 inputs -= numpy.min(inputs)
105 inputs /= numpy.max(inputs)
106 inputs *= 0.98
107 inputs += 0.01
108
109 return inputs
110
111
112 # 验证码,10个数字+26个大小写字母97-122
113 codes = list(string.digits + string.ascii_lowercase)
114 # 输入、隐藏、输出三层节点数
115 input_nodes = 45 * 100
116 # 四位验证码
117 output_nodes = (len(codes))
118 hidden_nodes = output_nodes * 10
119
120
121 # 学习因子
122 learning_rate = 0.2
123
124 # 创建神经网络
125 n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
126
127 # 读取训练数据
128 # training_data_file = open("web/mnist_dataset/mnist_train_100.csv")
129 training_data_file = open("web/train.csv")
130 training_data_list = training_data_file.readlines()
131 shuffle(training_data_list)
132 training_data_file.close()
133
134
135 # 世代,所有数据训练一遍为一个世代
136 epochs = 1
137 print("输入节点=%d,隐藏节点=%d,输出节点=%d,学习因子=%f,时代=%d" % (input_nodes, hidden_nodes, output_nodes, learning_rate, epochs))
138 print("开始训练...")
139 start = int(time.time())
140 for e in range(epochs):
141 # 训练神经网络
142 for record in training_data_list:
143 # 按逗号切分成数组
144 all_values = record.split(",")
145 # 缩放并转换成0.01到0.99之间的数组
146 train_inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
147 # 旋转正负10度
148 train_inputs_plus_10 = scipy.ndimage.interpolation.rotate(train_inputs.reshape(1, 4500), 10, cval=0.01,
149 reshape=False)
150 train_inputs_minus_10 = scipy.ndimage.interpolation.rotate(train_inputs.reshape(1, 4500), -10, cval=0.01,
151 reshape=False)
152
153 # 构建真实输出数组,除了目标位置是0.99,其他都是0.01
154 train_targets = numpy.zeros(output_nodes) + 0.01
155 code = all_values[0]
156 train_targets[codes.index(code)] = 0.99
157 n.train(train_inputs, train_targets)
158 n.train(train_inputs_plus_10, train_targets)
159 n.train(train_inputs_minus_10, train_targets)
160 pass
161 pass
162 end = int(time.time())
163 print("训练用时=", end - start, "秒")
164
165 # 测试数据
166 # test_data_file = open("web/mnist_dataset/mnist_test_10.csv")
167 # test_data_file = open("web/mnist_dataset/mnist_test.csv")
168 test_data_file = open("web/test.csv")
169 test_data_list = test_data_file.readlines()
170 test_data_file.close()
171 # all_values = test_data_list[0].split(",")
172 # image_array = numpy.asfarray(all_values[1:]).reshape((28, 28))
173 # matplotlib.pyplot.imshow(image_array, cmap='Greys', interpolation='None')
174 #
175 # value = n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)
176 # print(value)
177
178 # 统计分数
179 scorecard = []
180
181 print("开始测试...")
182 start = int(time.time())
183 # 检查所有测试数据
184 for record in test_data_list:
185 all_values = record.split(",")
186 # 正确答案
187 correct_label = (all_values[0])
188 # print(correct_label, "正确答案")
189
190 verify_inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
191 verify_outputs = n.query(verify_inputs)
192 # print(verify_outputs)
193 label = codes[numpy.argmax(verify_outputs)]
194 # print(label, "神经网络答案")
195
196 if label == correct_label:
197 scorecard.append(1)
198 else:
199 scorecard.append(0)
200 pass
201 pass
202 end = int(time.time())
203 print("检查用时=", end - start, "秒")
204
205 # print(scorecard)
206 scorecard_array = numpy.asarray(scorecard)
207 print("正确率=", scorecard_array.sum() / scorecard_array.size)
208
209 label = 0
210 # create the output signals for this label
211 targets = numpy.zeros(output_nodes) + 0.01
212 # all_values[0] is the target label for this record
213 targets[label] = 0.99
214 # print(targets)
215
216 # get image data
217 image_data = n.back_query(targets)
218 # print(image_data)
219
220 # plot image data
221 matplotlib.pyplot.imshow(image_data.reshape(45, 100), cmap='Greys', interpolation='None')
制作训练和测试数据:
1 import csv
2 # import matplotlib.pyplot
3 import os
4 import cv2
5
6 import numpy
7 import scipy.special
8 import string
9
10
11 class NeuralNetwork:
12
13 # 初始化神经网络
14 def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate):
15 # 设置输入层、隐藏层、输出层的节点数
16 self.inodes = inputnodes
17 self.hnodes = hiddenodes
18 self.onodes = outputnodes
19
20 # 学习因子
21 self.lr = learningrate
22
23 # 输入层、隐藏层、输出层之间的链接权重
24 # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5)
25 # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5)
26 # 利用正态分布采样权重
27 self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes))
28 self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes))
29
30 # S函数
31 self.activation_function = lambda x: scipy.special.expit(x)
32 pass
33
34 # 训练
35 def train(self, inputs_list, targets_list):
36 # 输入层转矩阵
37 inputs = numpy.array(inputs_list, ndmin=2).T
38 targets = numpy.array(targets_list, ndmin=2).T
39
40 # 隐藏层输入=权重点乘输入层矩阵
41 hidden_inputs = numpy.dot(self.wih, inputs)
42 # 隐藏层应用S函数
43 hidden_outputs = self.activation_function(hidden_inputs)
44
45 # 输出层输入=权重点乘隐藏层输入矩阵
46 final_inputs = numpy.dot(self.who, hidden_outputs)
47 # 输出层输入应用S函数
48 final_outputs = self.activation_function(final_inputs)
49
50 # 计算误差
51 output_errors = targets - final_outputs;
52 # 计算隐藏层误差
53 hidden_errors = numpy.dot(self.who.T, output_errors)
54 # 更新隐藏层和输出层之间的权重
55 self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)),
56 numpy.transpose(hidden_outputs))
57 # 更新输入层和隐藏层之间的权重
58 self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),
59 numpy.transpose(inputs))
60
61 pass
62
63 # 查询
64 def query(self, inputs_list):
65 # 输入层转矩阵
66 inputs = numpy.array(inputs_list, ndmin=2).T
67
68 # 隐藏层输入=权重点乘输入层矩阵
69 hidden_inputs = numpy.dot(self.wih, inputs)
70 # 隐藏层应用S函数
71 hidden_outputs = self.activation_function(hidden_inputs)
72
73 # 输出层输入=权重点乘隐藏层输入矩阵
74 final_inputs = numpy.dot(self.who, hidden_outputs)
75 # 输出层输入应用S函数
76 final_outputs = self.activation_function(final_inputs)
77
78 return final_outputs
79
80
81 # 读取训练数据
82 def convert_img_to_csv(img_dir, csv_file):
83 # 设置需要保存的csv路径
84 with open(r"web/" + csv_file + ".csv", "w", newline="") as f:
85 # 设置csv文件的列名
86 # column_name = ["label"]
87 # column_name.extend(["pixel%d" % i for i in range(32 * 32)])
88 # 将列名写入到csv文件中
89 writer = csv.writer(f)
90 # writer.writerow(column_name)
91 # 该目录下有9个目录,目录名从0-9
92 # for i in range(1):
93 # 获取目录的路径
94 # img_temp_dir = os.path.join(img_dir, str(i))
95 # 获取该目录下所有的文件
96 img_list = os.listdir(img_dir)
97 # 遍历所有的文件名称
98 for img_name in img_list:
99 # 判断文件是否为目录,如果为目录则不处理
100 if not os.path.isdir(img_name):
101 # 获取图片的路径
102 img_path = os.path.join(img_dir, img_name)
103 # 因为图片是黑白的,所以以灰色读取图片
104 img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
105 # 图片标签
106 row_data = [img_name[0]]
107 # 获取图片的像素
108 ary = 255.0 - img.flatten()
109 row_data.extend(ary)
110 # 将图片数据写入到csv文件中
111 writer.writerow(row_data)
112
113
114 def split_image(img_dir, save_path):
115 image_save_path_tail = ".jpg"
116 # 获取该目录下所有的文件
117 img_list = os.listdir(img_dir)
118 seq = 1
119 # 遍历所有的文件名称
120 for img_name in img_list:
121 # 判断文件是否为目录,如果为目录则不处理
122 if not os.path.isdir(img_name):
123 # 获取图片的路径
124 img_path = os.path.join(img_dir, img_name)
125 # 因为图片是黑白的,所以以灰色读取图片
126 src_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
127 # print(src_img)
128 # 180*100
129 for i in range(4): # [1]480*360==15*11---height
130 img_roi = src_img[0:100, (i * 45):((i + 1) * 45)]
131 image_save_path = "%s%s%s%d%s" % (save_path, img_name[(-8 + i)], "_", seq, image_save_path_tail)
132 cv2.imwrite(image_save_path, img_roi)
133 seq = seq + 1
134 pass
135 pass
136
137
138 if __name__ == "__main__":
139 # 将该目录下的图片保存为csv文件
140 # convert_img_to_csv("web/unknownCode", "train-single")
141 # convert_img_to_csv("web/train-single", "train")
142 # convert_img_to_csv("web/test-single", "test")
143 # a = list(string.digits + string.ascii_lowercase)
144
145 # print(ord(a[10]))
146 # print(a.index("a"))
147
148 # 一张图片切四张
149 # split_image("web/train/", "web/train-single/")
150 # convert_img_to_csv("web/train-single", "train")
151 split_image("web/unknownCode/", "web/test-single/")
152 convert_img_to_csv("web/test-single", "test")
153 pass