前言
本文主要内容是BP神经网络的Python实现(借助tensorflow库)和C++实现(未借助相关库)
Python实现BP神经网络
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'#暂时屏蔽警告,只显示error信息
from plugin import *
#构建数据
x_data = np.arange(-1,1,0.01)[: ,np.newaxis]
print("x_data",x_data)
noise = np.random.normal(0,0.05,x_data.shape)
y_data= 0.5*(np.sin(x_data)+noise)
print("y_data",y_data)
#占位符
xs = tf.compat.v1.placeholder(tf.float32,[None,1])
ys =tf.compat.v1.placeholder(tf.float32,[None,1])
h1 = add_layer(xs,1,20,tf.nn.relu)
prediction = add_layer(h1,20,1)
loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys-prediction), reduction_indices =[1] ) )
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
init = tf.global_variables_initializer()#初始化所有变量
sess = tf.compat.v1.Session()
if __name__ == "__main__":
sess.run(init)
for i in range(10000):
sess.run(train_step,feed_dict = {xs:x_data,ys:y_data})
if i %100 ==0:
print(sess.run(loss,feed_dict = {xs:x_data,ys:y_data}))
C++实现BP神经网络
BP(Back-Propagating)神经网络的特点是向前传送输入数据,向后反馈误差。理论上,一个三层(输入层,隐含层,输出层)的BP神经网络可以实现任意m维到任意n维的映射。
查看相应代码可访问我的github(https://github.com/YuruTu),博客主要发挥介绍性的工作,github上更新最新代码
https://github.com/YuruTu/BPNN/blob/master 头文件pch.h
#ifndef PCH_H
#define PCH_H
#include <cmath>
#include <iostream>
#include <vector>
#include <string.h>
#include <ctime>
#include <random>
#endif //PCH_H
主函数main.cpp
//作者cclplus
//初稿2018/05/01
//如果你认为有必要打赏我,我的支付宝号是707101557@qq.com
#include "pch.h"
using namespace std;
const double pi = atan(1.0) * 4;
//BP神经网络结构
struct BPNN {
int sample_count;//样本数量
int input_count;//输入向量的维数
int output_count;//输出向量的维数
int hidden_count;//实际使用隐层神经元数量
double study_rate;//学习速率
double precision;//精度控制参数
int loop_count;//循环次数
vector<vector<double>> v;//隐含层权矩阵
vector<vector<double>> w;//输出层权矩阵
};
BPNN CREATE_BPNN(int sc, int ic, int oc, int hc, double sr, double p, int lc);//创建一个BP神经网络
double rand_normal();//返回一个double类型的随机数
double sigmoid(double net) {
return 1.0 / (1 + exp(-net));
}
double purelin(double net) {
return 1.0 / (1 + exp(-net));
}
BPNN train_bp(vector<vector<double>> x, vector<vector<double>> y, BPNN bp);//训练
void use_bp(BPNN bp, vector<vector<double>> inoput);//使用BP神经网络进行前向传导运算
int main() {
//样本数目
int sample_count = 100;
//输入向量维数
int input_count = 1;
//输出向量维数
int output_count = 1;
//实际使用隐层神经元数目
int hidden_count = 4;
//学习速率
double study_rate = 0.02;
//精度控制参数
double precision = 0.001;
//循环次数
int loop_count = 10000;
int i;
double temp, temp1;
//训练样本
vector<vector<double>> x;
vector<vector<double>> y;
vector<vector<double>> input;
x.resize(sample_count);
for (i = 0; i < sample_count; i++) {
x[i].resize(input_count);
}
y.resize(sample_count);
for (i = 0; i < sample_count; i++) {
y[i].resize(output_count);
}
input.resize(sample_count);
for (i = 0; i < sample_count; i++) {
input[i].resize(input_count);
}
//自定义输入与输出
for (i = 0; i < sample_count; i++) {
temp = (double)i;
temp1 = (double)sample_count;
x[i][0] = pi / temp1 * temp;
input[i][0] = pi / temp1 * temp;
y[i][0] = 1.0*sin(x[i][0]);
}
BPNN bp;
bp = CREATE_BPNN(sample_count, input_count, output_count, hidden_count, study_rate, precision, loop_count);
bp = train_bp(x, y, bp);
use_bp(bp, input);
return 0;
}
//使用BP神经网络进行前向传导运算
void use_bp(BPNN bp, vector<vector<double>> input) {
//设置临时变量
double temp;
int i, j, k;
vector<double> O1;
O1.resize(bp.hidden_count);
vector<vector<double>> output;
output.resize(100);
for (i = 0; i < 100; i++) {
output[i].resize(bp.output_count);
}
for (i = 0; i < 100; i++) {
for (j = 0; j < bp.hidden_count; j++) {
temp = 0;
for (k = 0; k < bp.input_count; k++) {
temp = temp + input[i][k] * bp.v[k][j];
}
O1[j] = sigmoid(temp);
}
for (j = 0; j < bp.output_count; j++) {
temp = 0;
for (k = 0; k < bp.hidden_count; k++) {
temp = temp + O1[k] * bp.w[k][j];
}
output[i][j] = sigmoid(temp);
}
}
for (i = 0; i < 100; i++) {
for (j = 0; j < bp.output_count; j++) {
printf("%f ", output[i][j]);
}
}
printf("\n结束\n");
}
//训练一个BP神经网络
BPNN train_bp(vector<vector<double>> x, vector<vector<double>> y, BPNN bp) {
double f, a;
int hc, sc, lc, ic, oc;
f = bp.precision;//精度控制参数
a = bp.study_rate;//学习速率
hc = bp.hidden_count;//隐含层数
sc = bp.sample_count;//训练样本总数
lc = bp.loop_count;//循环次数
ic = bp.input_count;//输入维度
oc = bp.output_count;//输出维度
//修改量矩阵
vector<double> chg_h;//隐层
chg_h.resize(hc);
vector<double> chg_o;//输出层
chg_o.resize(oc);
vector<double> O1;
O1.resize(hc);
vector<double> O2;
O2.resize(oc);
//临时变量
double temp;
int i, j, m, n;
double mse;//均方误差
double e;//误差
e = f + 1;//保证循环顺利执行
for (n = 0; (e > f) && (n < lc); n++) {//n代表循环次数
e = 0;
mse = 0;
//全部样本均加入神经网络的训练
for (i = 0; i < sc; i++) {
//计算隐层输出向量
for (m = 0; m < hc; m++) {
temp = 0;
for (j = 0; j < ic; j++) {
temp = temp + x[i][j] * bp.v[j][m];
}
O1[m] = sigmoid(temp);
}
//计算输出值
for (m = 0; m < oc; m++) {
temp = 0;
for (j = 0; j < hc; j++) {
temp = temp + O1[j] * bp.w[j][m];
}
O2[m] = purelin(temp);
}
//计算输出层的权重修改
for (j = 0; j < oc; j++) {
chg_o[j] = O2[j] * (1 - O2[j])*(y[i][j] - O2[j]);
}
//计算隐层的权重修改
for (j = 0; j < hc; j++) {
temp = 0;
for (m = 0; m < oc; m++) {
temp = temp + bp.w[j][m] * chg_o[m];
}
chg_h[j] = temp * O1[j] * (1 - O1[j]);
}
//计算误差和均方根误差
for (j = 0; j < oc; j++) {
e = e + (y[i][j] - O2[j])*(y[i][j] - O2[j]);
mse = mse + y[i][j] * y[i][j];
}
//对权值矩阵做出修改
for (j = 0; j < hc; j++) {
for (m = 0; m < oc; m++) {
bp.w[j][m] = bp.w[j][m] + a * O1[j] * chg_o[m];
}
}
for (j = 0; j < ic; j++) {
for (m = 0; m < hc; m++) {
bp.v[j][m] = bp.v[j][m] + a * x[i][j] * chg_h[m];
}
}
}
//每循环一百次输出一次误差
if (n % 100 == 0) {
mse = e / mse;
printf("误差 :%f\n", e);
printf("均方根误差 :%f\n", mse);
printf("当前循环次数:%d\n", n);
}
}
//循环结束,输出最终信息
printf("循环总次数:%d\n", n);
printf("调整后的隐层权值矩阵:\n");
for (i = 0; i < ic; i++) {
for (j = 0; j < hc; j++) {
printf("%f ", bp.v[i][j]);
}
printf("\n");
}
printf("调整后的输出层权值矩阵:\n");
for (i = 0; i < hc; i++) {
for (j = 0; j < oc; j++) {
printf("%f ", bp.w[i][j]);
}
printf("\n");
}
printf("神经网络训练结束:\n");
printf("最终误差:%f\n", e);
return bp;
}
//创建一个BP神经网络
BPNN CREATE_BPNN(int sc, int ic, int oc, int hc, double sr, double p, int lc) {
BPNN bp;
bp.sample_count = sc;
bp.input_count = ic;
bp.output_count = oc;
bp.hidden_count = hc;
bp.study_rate = sr;
bp.precision = p;
bp.loop_count = lc;
int i, j;
bp.v.resize(ic);//隐含层的权值矩阵,共有input_count行,hidden_count列
for (i = 0; i < ic; i++) {
bp.v[i].resize(hc);
}
//数据的初始化
for (i = 0; i < ic; i++) {
for (j = 0; j < hc; j++) {
bp.v[i][j] = rand_normal();
}
}
bp.w.resize(hc);//输出层的权值矩阵,共有hidden_count行,output_count列
for (i = 0; i < hc; i++) {
bp.w[i].resize(oc);
}
for (i = 0; i < hc; i++) {
for (j = 0; j < oc; j++) {
bp.w[i][j] = rand_normal();
}
}
return bp;
}
//返回一个double类型的随机数,这么做的目的是破坏神经网络结构的对称性
//基本原理,参见独立同分布的中心极限定理
double rand_normal() {
int i;
const int normal_count = 200;//样本数目采用200个
double ccl_num;
double ccl_s;
double ccl_ar[normal_count];
ccl_num = 0;
for (i = 0; i < normal_count; i++) {
ccl_ar[i] = rand() % 1000 / (double)1001;
ccl_num += ccl_ar[i];
}
ccl_num -= (normal_count / 2);//减去0-1均匀分布的均值
ccl_s = 1.0*normal_count / 12.0;//0-1分布的方差为1/12
ccl_s = sqrt(ccl_s);
ccl_num /= ccl_s;//此时ccl_num接近标准正态分布的一个子集
ccl_num /= 100;//变为正态分布(0,0.01)的一个子集,论文中有给出证明过程
cout << " 随机值" << ccl_num << endl;
return ccl_num;
}