三层神经网络,训练0到9十个数字并测试:

1 import numpy
  2 import scipy.special
  3 # import matplotlib.pyplot
  4 import time
  5 
  6 
  7 class NeuralNetwork:
  8 
  9     # 初始化神经网络
 10     def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate):
 11         # 设置输入层、隐藏层、输出层的节点数
 12         self.inodes = inputnodes
 13         self.hnodes = hiddenodes
 14         self.onodes = outputnodes
 15 
 16         # 学习因子
 17         self.lr = learningrate
 18 
 19         # 输入层、隐藏层、输出层之间的链接权重
 20         # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5)
 21         # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5)
 22         # 利用正态分布采样权重
 23         self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes))
 24         self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes))
 25 
 26         # S函数
 27         self.activation_function = lambda x: scipy.special.expit(x)
 28         pass
 29 
 30     # 训练
 31     def train(self, inputs_list, targets_list):
 32         # 输入层转矩阵
 33         inputs = numpy.array(inputs_list, ndmin=2).T
 34         targets = numpy.array(targets_list, ndmin=2).T
 35 
 36         # 隐藏层输入=权重点乘输入层矩阵
 37         hidden_inputs = numpy.dot(self.wih, inputs)
 38         # 隐藏层应用S函数
 39         hidden_outputs = self.activation_function(hidden_inputs)
 40 
 41         # 输出层输入=权重点乘隐藏层输入矩阵
 42         final_inputs = numpy.dot(self.who, hidden_outputs)
 43         # 输出层输入应用S函数
 44         final_outputs = self.activation_function(final_inputs)
 45 
 46         # 计算误差
 47         output_errors = targets - final_outputs;
 48         # 计算隐藏层误差
 49         hidden_errors = numpy.dot(self.who.T, output_errors)
 50         # 更新隐藏层和输出层之间的权重
 51         self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)),
 52                                         numpy.transpose(hidden_outputs))
 53         # 更新输入层和隐藏层之间的权重
 54         self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),
 55                                         numpy.transpose(inputs))
 56 
 57         pass
 58 
 59     # 查询
 60     def query(self, inputs_list):
 61         # 输入层转矩阵
 62         inputs = numpy.array(inputs_list, ndmin=2).T
 63 
 64         # 隐藏层输入=权重点乘输入层矩阵
 65         hidden_inputs = numpy.dot(self.wih, inputs)
 66         # 隐藏层应用S函数
 67         hidden_outputs = self.activation_function(hidden_inputs)
 68 
 69         # 输出层输入=权重点乘隐藏层输入矩阵
 70         final_inputs = numpy.dot(self.who, hidden_outputs)
 71         # 输出层输入应用S函数
 72         final_outputs = self.activation_function(final_inputs)
 73 
 74         return final_outputs
 75 
 76 
 77 # 输入、隐藏、输出三层节点数
 78 input_nodes = 784
 79 hidden_nodes = 100
 80 output_nodes = 10
 81 
 82 # 学习因子
 83 learning_rate = 0.2
 84 
 85 # 创建神经网络
 86 n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
 87 
 88 # 读取训练数据
 89 # training_data_file = open("web/mnist_dataset/mnist_train_100.csv")
 90 training_data_file = open("web/mnist_dataset/mnist_train.csv")
 91 training_data_list = training_data_file.readlines()
 92 training_data_file.close()
 93 
 94 # 世代,所有数据训练一遍为一个世代
 95 epochs = 1
 96 start = int(time.time())
 97 for e in range(epochs):
 98     # 训练神经网络
 99     for record in training_data_list:
100         # 按逗号切分成数组
101         all_values = record.split(",")
102         # 缩放并转换成0.01到0.99之间的数组
103         inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
104         # 构建真实输出数组,除了目标位置是0.99,其他都是0.01
105         targets = numpy.zeros(output_nodes) + 0.01
106         targets[int(all_values[0])] = 0.99
107         n.train(inputs, targets)
108         pass
109     pass
110 end = int(time.time())
111 print("训练用时=", end - start, "秒")
112 
113 # 测试数据
114 # test_data_file = open("web/mnist_dataset/mnist_test_10.csv")
115 test_data_file = open("web/mnist_dataset/mnist_test.csv")
116 test_data_list = test_data_file.readlines()
117 test_data_file.close()
118 # all_values = test_data_list[0].split(",")
119 # image_array = numpy.asfarray(all_values[1:]).reshape((28, 28))
120 # matplotlib.pyplot.imshow(image_array, cmap='Greys', interpolation='None')
121 #
122 # value = n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)
123 # print(value)
124 
125 # 统计分数
126 scorecard = []
127 
128 start = int(time.time())
129 # 检查所有测试数据
130 for record in test_data_list:
131     all_values = record.split(",")
132     # 正确答案
133     correct_label = int(all_values[0])
134     # print(correct_label, "正确答案")
135 
136     inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
137     outputs = n.query(inputs)
138     label = numpy.argmax(outputs)
139     # print(label, "神经网络答案")
140 
141     if label == correct_label:
142         scorecard.append(1)
143     else:
144         scorecard.append(0)
145         pass
146 end = int(time.time())
147 print("检查用时=", end - start, "秒")
148 
149 # print(scorecard)
150 scorecard_array = numpy.asarray(scorecard)
151 print("正确率=", scorecard_array.sum() / scorecard_array.size)

验证码的数字和字母识别:

1 import numpy
  2 import scipy.special
  3 import scipy.ndimage.interpolation
  4 # import matplotlib.pyplot
  5 import time
  6 import string
  7 from random import shuffle
  8 
  9 import matplotlib.pyplot
 10 
 11 
 12 
 13 class NeuralNetwork:
 14 
 15     # 初始化神经网络
 16     def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate):
 17         # 设置输入层、隐藏层、输出层的节点数
 18         self.inodes = inputnodes
 19         self.hnodes = hiddenodes
 20         self.onodes = outputnodes
 21 
 22         # 学习因子
 23         self.lr = learningrate
 24 
 25         # 输入层、隐藏层、输出层之间的链接权重
 26         # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5)
 27         # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5)
 28         # 利用正态分布采样权重
 29         self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes))
 30         self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes))
 31 
 32         # S函数
 33         self.activation_function = lambda x: scipy.special.expit(x)
 34         self.inverse_activation_function = lambda x: scipy.special.logit(x)
 35         pass
 36 
 37     # 训练
 38     def train(self, inputs_list, targets_list):
 39         # 输入层转矩阵
 40         inputs = numpy.array(inputs_list, ndmin=2).T
 41         targets = numpy.array(targets_list, ndmin=2).T
 42 
 43         # 隐藏层输入=权重点乘输入层矩阵
 44         hidden_inputs = numpy.dot(self.wih, inputs)
 45         # 隐藏层应用S函数
 46         hidden_outputs = self.activation_function(hidden_inputs)
 47 
 48         # 输出层输入=权重点乘隐藏层输入矩阵
 49         final_inputs = numpy.dot(self.who, hidden_outputs)
 50         # 输出层输入应用S函数
 51         final_outputs = self.activation_function(final_inputs)
 52 
 53         # 计算误差
 54         output_errors = targets - final_outputs
 55         # 计算隐藏层误差
 56         hidden_errors = numpy.dot(self.who.T, output_errors)
 57         # 更新隐藏层和输出层之间的权重
 58         self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)),
 59                                         numpy.transpose(hidden_outputs))
 60         # 更新输入层和隐藏层之间的权重
 61         self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),
 62                                         numpy.transpose(inputs))
 63 
 64         pass
 65 
 66     # 查询
 67     def query(self, inputs_list):
 68         # 输入层转矩阵
 69         inputs = numpy.array(inputs_list, ndmin=2).T
 70 
 71         # 隐藏层输入=权重点乘输入层矩阵
 72         hidden_inputs = numpy.dot(self.wih, inputs)
 73         # 隐藏层应用S函数
 74         hidden_outputs = self.activation_function(hidden_inputs)
 75 
 76         # 输出层输入=权重点乘隐藏层输入矩阵
 77         final_inputs = numpy.dot(self.who, hidden_outputs)
 78         # 输出层输入应用S函数
 79         final_outputs = self.activation_function(final_inputs)
 80 
 81         return final_outputs
 82 
 83     def back_query(self, targets_list):
 84         # transpose the targets list to a vertical array
 85         final_outputs = numpy.array(targets_list, ndmin=2).T
 86 
 87         # calculate the signal into the final output layer
 88         final_inputs = self.inverse_activation_function(final_outputs)
 89 
 90         # calculate the signal out of the hidden layer
 91         hidden_outputs = numpy.dot(self.who.T, final_inputs)
 92         # scale them back to 0.01 to .99
 93         hidden_outputs -= numpy.min(hidden_outputs)
 94         hidden_outputs /= numpy.max(hidden_outputs)
 95         hidden_outputs *= 0.98
 96         hidden_outputs += 0.01
 97 
 98         # calculate the signal into the hidden layer
 99         hidden_inputs = self.inverse_activation_function(hidden_outputs)
100 
101         # calculate the signal out of the input layer
102         inputs = numpy.dot(self.wih.T, hidden_inputs)
103         # scale them back to 0.01 to .99
104         inputs -= numpy.min(inputs)
105         inputs /= numpy.max(inputs)
106         inputs *= 0.98
107         inputs += 0.01
108 
109         return inputs
110 
111 
112 # 验证码,10个数字+26个大小写字母97-122
113 codes = list(string.digits + string.ascii_lowercase)
114 # 输入、隐藏、输出三层节点数
115 input_nodes = 45 * 100
116 # 四位验证码
117 output_nodes = (len(codes))
118 hidden_nodes = output_nodes * 10
119 
120 
121 # 学习因子
122 learning_rate = 0.2
123 
124 # 创建神经网络
125 n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
126 
127 # 读取训练数据
128 # training_data_file = open("web/mnist_dataset/mnist_train_100.csv")
129 training_data_file = open("web/train.csv")
130 training_data_list = training_data_file.readlines()
131 shuffle(training_data_list)
132 training_data_file.close()
133 
134 
135 # 世代,所有数据训练一遍为一个世代
136 epochs = 1
137 print("输入节点=%d,隐藏节点=%d,输出节点=%d,学习因子=%f,时代=%d" % (input_nodes, hidden_nodes, output_nodes, learning_rate, epochs))
138 print("开始训练...")
139 start = int(time.time())
140 for e in range(epochs):
141     # 训练神经网络
142     for record in training_data_list:
143         # 按逗号切分成数组
144         all_values = record.split(",")
145         # 缩放并转换成0.01到0.99之间的数组
146         train_inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
147         # 旋转正负10度
148         train_inputs_plus_10 = scipy.ndimage.interpolation.rotate(train_inputs.reshape(1, 4500), 10, cval=0.01,
149                                                                   reshape=False)
150         train_inputs_minus_10 = scipy.ndimage.interpolation.rotate(train_inputs.reshape(1, 4500), -10, cval=0.01,
151                                                                    reshape=False)
152 
153         # 构建真实输出数组,除了目标位置是0.99,其他都是0.01
154         train_targets = numpy.zeros(output_nodes) + 0.01
155         code = all_values[0]
156         train_targets[codes.index(code)] = 0.99
157         n.train(train_inputs, train_targets)
158         n.train(train_inputs_plus_10, train_targets)
159         n.train(train_inputs_minus_10, train_targets)
160         pass
161     pass
162 end = int(time.time())
163 print("训练用时=", end - start, "秒")
164 
165 # 测试数据
166 # test_data_file = open("web/mnist_dataset/mnist_test_10.csv")
167 # test_data_file = open("web/mnist_dataset/mnist_test.csv")
168 test_data_file = open("web/test.csv")
169 test_data_list = test_data_file.readlines()
170 test_data_file.close()
171 # all_values = test_data_list[0].split(",")
172 # image_array = numpy.asfarray(all_values[1:]).reshape((28, 28))
173 # matplotlib.pyplot.imshow(image_array, cmap='Greys', interpolation='None')
174 #
175 # value = n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)
176 # print(value)
177 
178 # 统计分数
179 scorecard = []
180 
181 print("开始测试...")
182 start = int(time.time())
183 # 检查所有测试数据
184 for record in test_data_list:
185     all_values = record.split(",")
186     # 正确答案
187     correct_label = (all_values[0])
188     # print(correct_label, "正确答案")
189 
190     verify_inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
191     verify_outputs = n.query(verify_inputs)
192     # print(verify_outputs)
193     label = codes[numpy.argmax(verify_outputs)]
194     # print(label, "神经网络答案")
195 
196     if label == correct_label:
197         scorecard.append(1)
198     else:
199         scorecard.append(0)
200         pass
201     pass
202 end = int(time.time())
203 print("检查用时=", end - start, "秒")
204 
205 # print(scorecard)
206 scorecard_array = numpy.asarray(scorecard)
207 print("正确率=", scorecard_array.sum() / scorecard_array.size)
208 
209 label = 0
210 # create the output signals for this label
211 targets = numpy.zeros(output_nodes) + 0.01
212 # all_values[0] is the target label for this record
213 targets[label] = 0.99
214 # print(targets)
215 
216 # get image data
217 image_data = n.back_query(targets)
218 # print(image_data)
219 
220 # plot image data
221 matplotlib.pyplot.imshow(image_data.reshape(45, 100), cmap='Greys', interpolation='None')

制作训练和测试数据:

1 import csv
  2 # import matplotlib.pyplot
  3 import os
  4 import cv2
  5 
  6 import numpy
  7 import scipy.special
  8 import string
  9 
 10 
 11 class NeuralNetwork:
 12 
 13     # 初始化神经网络
 14     def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate):
 15         # 设置输入层、隐藏层、输出层的节点数
 16         self.inodes = inputnodes
 17         self.hnodes = hiddenodes
 18         self.onodes = outputnodes
 19 
 20         # 学习因子
 21         self.lr = learningrate
 22 
 23         # 输入层、隐藏层、输出层之间的链接权重
 24         # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5)
 25         # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5)
 26         # 利用正态分布采样权重
 27         self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes))
 28         self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes))
 29 
 30         # S函数
 31         self.activation_function = lambda x: scipy.special.expit(x)
 32         pass
 33 
 34     # 训练
 35     def train(self, inputs_list, targets_list):
 36         # 输入层转矩阵
 37         inputs = numpy.array(inputs_list, ndmin=2).T
 38         targets = numpy.array(targets_list, ndmin=2).T
 39 
 40         # 隐藏层输入=权重点乘输入层矩阵
 41         hidden_inputs = numpy.dot(self.wih, inputs)
 42         # 隐藏层应用S函数
 43         hidden_outputs = self.activation_function(hidden_inputs)
 44 
 45         # 输出层输入=权重点乘隐藏层输入矩阵
 46         final_inputs = numpy.dot(self.who, hidden_outputs)
 47         # 输出层输入应用S函数
 48         final_outputs = self.activation_function(final_inputs)
 49 
 50         # 计算误差
 51         output_errors = targets - final_outputs;
 52         # 计算隐藏层误差
 53         hidden_errors = numpy.dot(self.who.T, output_errors)
 54         # 更新隐藏层和输出层之间的权重
 55         self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)),
 56                                         numpy.transpose(hidden_outputs))
 57         # 更新输入层和隐藏层之间的权重
 58         self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),
 59                                         numpy.transpose(inputs))
 60 
 61         pass
 62 
 63     # 查询
 64     def query(self, inputs_list):
 65         # 输入层转矩阵
 66         inputs = numpy.array(inputs_list, ndmin=2).T
 67 
 68         # 隐藏层输入=权重点乘输入层矩阵
 69         hidden_inputs = numpy.dot(self.wih, inputs)
 70         # 隐藏层应用S函数
 71         hidden_outputs = self.activation_function(hidden_inputs)
 72 
 73         # 输出层输入=权重点乘隐藏层输入矩阵
 74         final_inputs = numpy.dot(self.who, hidden_outputs)
 75         # 输出层输入应用S函数
 76         final_outputs = self.activation_function(final_inputs)
 77 
 78         return final_outputs
 79 
 80 
 81 # 读取训练数据
 82 def convert_img_to_csv(img_dir, csv_file):
 83     # 设置需要保存的csv路径
 84     with open(r"web/" + csv_file + ".csv", "w", newline="") as f:
 85         # 设置csv文件的列名
 86         # column_name = ["label"]
 87         # column_name.extend(["pixel%d" % i for i in range(32 * 32)])
 88         # 将列名写入到csv文件中
 89         writer = csv.writer(f)
 90         # writer.writerow(column_name)
 91         # 该目录下有9个目录,目录名从0-9
 92         # for i in range(1):
 93         # 获取目录的路径
 94         # img_temp_dir = os.path.join(img_dir, str(i))
 95         # 获取该目录下所有的文件
 96         img_list = os.listdir(img_dir)
 97         # 遍历所有的文件名称
 98         for img_name in img_list:
 99             # 判断文件是否为目录,如果为目录则不处理
100             if not os.path.isdir(img_name):
101                 # 获取图片的路径
102                 img_path = os.path.join(img_dir, img_name)
103                 # 因为图片是黑白的,所以以灰色读取图片
104                 img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
105                 # 图片标签
106                 row_data = [img_name[0]]
107                 # 获取图片的像素
108                 ary = 255.0 - img.flatten()
109                 row_data.extend(ary)
110                 # 将图片数据写入到csv文件中
111                 writer.writerow(row_data)
112 
113 
114 def split_image(img_dir, save_path):
115     image_save_path_tail = ".jpg"
116     # 获取该目录下所有的文件
117     img_list = os.listdir(img_dir)
118     seq = 1
119     # 遍历所有的文件名称
120     for img_name in img_list:
121         # 判断文件是否为目录,如果为目录则不处理
122         if not os.path.isdir(img_name):
123             # 获取图片的路径
124             img_path = os.path.join(img_dir, img_name)
125             # 因为图片是黑白的,所以以灰色读取图片
126             src_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
127             # print(src_img)
128             # 180*100
129             for i in range(4):  # [1]480*360==15*11---height
130                 img_roi = src_img[0:100, (i * 45):((i + 1) * 45)]
131                 image_save_path = "%s%s%s%d%s" % (save_path, img_name[(-8 + i)], "_", seq, image_save_path_tail)
132                 cv2.imwrite(image_save_path, img_roi)
133                 seq = seq + 1
134             pass
135     pass
136 
137 
138 if __name__ == "__main__":
139     # 将该目录下的图片保存为csv文件
140     # convert_img_to_csv("web/unknownCode", "train-single")
141     # convert_img_to_csv("web/train-single", "train")
142     # convert_img_to_csv("web/test-single", "test")
143     # a = list(string.digits + string.ascii_lowercase)
144 
145     # print(ord(a[10]))
146     # print(a.index("a"))
147 
148     # 一张图片切四张
149     # split_image("web/train/", "web/train-single/")
150     # convert_img_to_csv("web/train-single", "train")
151     split_image("web/unknownCode/", "web/test-single/")
152     convert_img_to_csv("web/test-single", "test")
153     pass