最近在忙着写论文,开题答辩也已经顺利的通过了,接下来我打算为读博做打算
–小记
最近在写一篇论文,其中我打算使用遗传算法加上神经网络来优化数据,但是我的数据有点少,于是我就上Mnist数据集上找了一个数据,其实也不是我找的,主要是我找了一个源码。然后我在他的程序上做了修改,这应该不算抄袭吧? 罪过,罪过。
话不多说上程序
GA.py
from numpy import *
class GA:
def __init__(self,sizes=[2,3,1]):
'''
:param sizes:对神经网络权重进行部分隐藏 利用dropout
'''
self.sizes = sizes
self.weights = [random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
self.a = []
self.totalWeight = []
self.accuracy = [0,0]
def newWeight(self):
'''
重新生成一个新的权重用于测试
:return:
'''
self.weights = [random.randn(y, x) for x, y in zip(self.sizes[:-1], self.sizes[1:])]
print(self.weights)
def variation(self):
'''
突变
:return:改变权重
'''
layers = len(self.sizes)
for i in range(layers-1):
# print(layers)
# number=self.sizes[i]*self.sizes[i+1]
self.a.append([random.randint(0,self.sizes[i+1]),random.randint(0,self.sizes[i])])
for j in range(layers-1):
self.weights[j][self.a[j][0]][self.a[j][1]]=0
self.totalWeight.append(self.weights)
self.a = []
def cross(self,newAccuracy=[1,2]):
'''
交叉
:param newAccuracy:
:return:
'''
#找到最大的两个数
newAccuracy = self.accuracy
a1 = newAccuracy.index(max(newAccuracy))
newAccuracy[newAccuracy.index(max(newAccuracy))] = 0
a2 = newAccuracy.index(max(newAccuracy))
#对最大的两个权重进行交叉
self.totalWeight[a1]
self.totalWeight[a2]
layers = len(self.sizes)
for i in range(layers-2):
if i/2==0:
self.weights[i] = self.totalWeight[a1][i]
else:
self.weights[i] = self.totalWeight[a2][i]
if __name__ =="__main__":
ga = GA()
print(type(ga.weights))
ga.variation()
ga.newWeight()
ga.variation()
ga.cross()
print(ga.weights)
接下来是神经网络的程序
NN.py
# coding=UTF-8
import numpy as np
import random
from GA import *
def sigmoid(x):
s = 1/(1+np.exp(-x))
# s = (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
return s
def sigmoid_derivative(x):
s = 1/(1+ np.exp(-x))
ds = s*(1 - s)
# s = (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
# ds = 1 - s ** 2
return ds
class Network:
def __init__(self, sizes):
'''
:param sizes: 每层神经元的个数,
例如:第一层2个神经元,第二层3神经元:
net = Network[2, 3, 1] 输入层:2个,隐藏层:3个,输出层:1个.
'''
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(y,1) for y in sizes[1:]]
self.weights = [np.random.randn(y,x)#服从正态分布(均值0,方差1)中生成
for x, y in zip(sizes[:-1], sizes[1:])]
#zip: 产生一个新的list,取两个list的值,从第一个依次往下取,循环取出
def feedforward(self, a):
'''
Return the output of the network if 'a' is input.
:param self:
:param a:
:return:
'''
for b,w in zip(self.biases,self.weights):
a = sigmoid(np.dot(w, a)+b)
return a
def SGD(self, training_data, epochs, mini_batch_size,eta,test_data=None):
'''
随机梯度下降算法
:param training_data: 训练集list (x,y)
:param epochs: 训练的次数
:param min_batch_size: 最小块的实例
:param eta: 学习率
:param test_data:测试集
:return:
'''
#利用遗传算法对权重重新进行赋值
ga = GA(self.sizes)
ga.variation()
ga.newWeight()
ga.variation()
ga.cross()
self.weights = ga.weights
if test_data:
n_test = len(test_data)
n = len(training_data)
for j in range(epochs):
ga.variation()
ga.cross()
self.weights = ga.weights
random.shuffle(training_data)#洗牌,打乱list里面的元素 来实现抽取的效果
mini_batches=[
training_data[k:k+mini_batch_size]
for k in range(0, n, mini_batch_size)
]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch,eta)
if test_data:
ga.accuracy.append(self.evaluate(test_data)/n_test)#把当前的准确率压入列表
ga.totalWeight.append(self.weights)#把当前的权重值压入
print("Epoch {0}: {1} / {2}".format(
j, self.evaluate(test_data),n_test))
else:
print("Epoch {0} complete".format(j))
def update_mini_batch(self, mini_batch, eta):
'''
完成权重和偏向的更新
:param mini_batch: 单个一个小块例如100张图片 a list of tuples (x,y)
:param eta: 学习率 learning rate
:return:
'''
nabla_b = [np.zeros(b.shape) for b in self.biases] #偏向的初始化
nabla_w = [np.zeros(w.shape) for w in self.weights]#权重的初始化
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x,y)#b,w的偏导数
nabla_b = [nb+dnb for nb, dnb in zip(nabla_b,delta_nabla_b)]
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
self.weights = [w-(eta/len(mini_batch))*nw
for w, nw in zip(self.weights, nabla_w)]
self.biases = [b - (eta/len(mini_batch))*nb
for b, nb in zip(self.biases, nabla_b)]
def evaluate(self, test_data):
'''
验证正确率
:param test_data:测试集
:return:
'''
test_results = [(np.argmax(self.feedforward(x)),y)
for (x, y) in test_data ]
return sum(int (x == y) for (x,y) in test_results)
def backprop(self, x, y):
'''
:param x: 784维的向量
:param y: 10维的向量
:return: 偏重和权重的向量
'''
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
#feedworward
activation = x #设置输入层的输出值 activation
activations = [x] #设置所有层的输出值
zs = [] #z=wx+b 储存所有的z向量 每一层
for b, w in zip(self.biases, self.weights):
z = np.dot(w,activation)+b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
#backward pass 这个就是第三步最后一层的输出error
delta = self.cost_derivative(activations[-1],y)*sigmoid_derivative(zs[-1])
nabla_b[-1] = delta # 最后一层cost函数对偏向的导数
nabla_w[-1] = np.dot(delta,activations[-2].transpose()) #最后一层cost函数对权重的偏导
#
# l 是从输出层往回反 反向更新
for l in range(2, self.num_layers):
z = zs[-l]
sp = sigmoid_derivative(z)
delta = np.dot(self.weights[-l+1].transpose(),delta)*sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
return (nabla_b, nabla_w)
def cost_derivative(self, output_activation, y):
'''
:param param:
:param y:
:return:
'''
return (output_activation - y)
class EntropyCost(object):
@staticmethod
def fn(a,y):
return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))
@staticmethod
def delta(z, a, y):
return (a-y)
class QuadraicCost(object):
@staticmethod
def fn(a,y):
return 0.5*np.linalg.norm(a-y)**2
@staticmethod
def delta(z, a, y):
return (a-y) * sigmoid_derivative(z)
if __name__ == "__main__":
net = Network([2,3,1])
print('num_layers:'+str(net.num_layers))
print('sizes:'+str(net.sizes))
# print('biases:'+str(net.biases))
print("weight"+str(net.weights))
接下来是测试用的
这里我就不公开我的论文数据了,因为这个项目还没有结,而且是市政府的项目,我怕导师有意见。所以这里我采用Mnist的数据集
import mnist_loader
import time
from NN import *
if __name__ == "__main__":
start = time.clock()
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
training_data, validation_data, test_data = list(training_data),list(validation_data),list(test_data)
print('training data')
print(type(training_data))
print(training_data[0][0].shape)
print(training_data[0])
print("validation data")#验证集
print(len(validation_data))
print("test data")
print(len(test_data))
print("-------------------------------")
net = Network([784, 30,10])
net.SGD(training_data, 30, 10, 3, test_data=test_data)
上图片的目的是为了证明我的程序是可以运行的
我的准确率没那么高参数还在优化