人比较懒,关于背景直接截图说明,以下是整个网络结构。
这篇主要是实现一个完整的AlexNet卷积神经网络,对其前向传播和反向传播进行速度测试。并没有加载原始数据进行训练。值得注意的是,这个代码在结构上和原始网络一样,但中间有很多细节是不一致的,比如卷积核的代大小步长等。但是它能很清楚的让你理解整个网络。不想看分解的可以先拉到文末复制所有代码运行一下,看看效果。什么都不用修改,直接运行哦。
----------------------------------------------------进入代码分解--------------------------------------
# 导入相应的系统库以及tensorflow
from datetime import datetime
import math
import time
import tensorflow as tf
# 总共测试100组batch,每组32个
batch_size = 32
num_batches = 100
# 定义一个用来显示每一卷积层或池化层的的名称以及输出尺寸
def print_activations(t):
print(t.op.name, '', t.get_shape().as_list())
def inference(images):
'''
:param images: 输入图像尺寸32*224*244*3
:return: 最后一层pool5以及parameters
'''
parameters = []
with tf.name_scope('conv1') as scope:
kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding = 'SAME')
# s = 4,"NHWC", the data is stored in the order of: [batch, height, width, channels]
biases = tf.Variable(tf.constant(0.0, shape = [64], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name = scope)
print_activations(conv1) # 输出conv1的名称和形状
parameters += [kernel, biases]
# 第一层卷积后使用局部响应归一化,提升泛化能力
# lrn1 = tf.nn.lrn(conv1, 4, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn1')
pool1 = tf.nn.max_pool(conv1, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool1')
print_activations(pool1)
with tf.name_scope('conv2') as scope:
kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [192], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv2)
# lrn2 = tf.nn.lrn(conv2, 4, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn2')
pool2 = tf.nn.max_pool(conv2, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool2')
print_activations(pool2)
with tf.name_scope('conv3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [384], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv3)
with tf.name_scope('conv4') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv4 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv4)
with tf.name_scope('conv5') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv5 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv5)
pool5 = tf.nn.max_pool(conv5, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool5')
print_activations(pool5)
return pool5, parameters
卷积过程结束,这里使用的LRN,其他文献已经放弃使用了,因为效果不明显,但这里使用后速度减低1/3,准确率的影响也不大。可使用可不使用。
接下来是时间测试函数。
def time_tensorflow_run(session, target, info_string):
'''
:param session: 创建的会话
:param target: 需要评测的运算算子
:param info_string: 测试的名称
:return:
'''
num_steps_burn_in = 10 # 让程序预热10轮,因为前几轮的运算有大量的显存加载时间,10轮以后再统计时间
total_duration = 0.0 # 统计总时间
total_duration_squared = 0.0 # 以及时间均方差
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target)
duration = time.time() - start_time
if i >= num_steps_burn_in:
if not i % 10:
print('%s: step %d, duration = %.3f' %(datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / num_batches # 平均时间
vr = total_duration_squared / num_batches - mn * mn
sd = math.sqrt(vr) # 时间标准差
print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %(datetime.now(), info_string, num_batches, mn, sd))
主函数
# 主函数
def run_benchmark():
with tf.Graph().as_default(): # 定义默认图
# 这里不用imagenet,代替的是随机数进行训练
image_size = 224
images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype = tf.float32, stddev = 1e-1))
pool5, parameters = inference(images)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# 前向传播时间计算
time_tensorflow_run(sess, pool5, "Forward")
# 反向传播时间计算
objective = tf.nn.l2_loss(pool5)
grad = tf.gradients(objective, parameters)
time_tensorflow_run(sess, grad, "Forward-backward")
如果要运行就一句话就行了
run_benchmark()
需要注意的是,这个代码LRN被我注释掉了,如果想试试LRN的结果,在第一个卷积和第二个卷积处修改一下就好了。
我没用GPU,在CPU上的时间比黄文坚老师用GPU慢了很多,他是用了LRN前向0.26秒,我的对应是0.468秒。
结果应有三部分组成;
conv1 [32, 56, 56, 64]
conv1/pool1 [32, 27, 27, 64]
conv2 [32, 27, 27, 192]
conv2/pool2 [32, 13, 13, 192]
conv3 [32, 13, 13, 384]
conv4 [32, 13, 13, 256]
conv5 [32, 13, 13, 256]
conv5/pool5 [32, 6, 6, 256]
2018-04-24 15:27:52.440312: step 0, duration = 0.467
2018-04-24 15:27:57.211732: step 10, duration = 0.516
2018-04-24 15:28:02.045221: step 20, duration = 0.467
2018-04-24 15:28:06.703669: step 30, duration = 0.464
2018-04-24 15:28:11.335167: step 40, duration = 0.461
2018-04-24 15:28:15.966167: step 50, duration = 0.465
2018-04-24 15:28:20.588067: step 60, duration = 0.464
2018-04-24 15:28:25.376623: step 70, duration = 0.462
2018-04-24 15:28:29.995012: step 80, duration = 0.459
2018-04-24 15:28:34.616423: step 90, duration = 0.461
2018-04-24 15:28:38.799483: Forward across 100 steps, 0.468 +/- 0.018 sec / batch
2018-04-24 15:29:09.741985: step 0, duration = 2.689
2018-04-24 15:29:37.293994: step 10, duration = 2.749
2018-04-24 15:30:04.358533: step 20, duration = 2.696
2018-04-24 15:30:31.860958: step 30, duration = 2.783
2018-04-24 15:30:58.855577: step 40, duration = 2.688
2018-04-24 15:31:25.696813: step 50, duration = 2.689
2018-04-24 15:31:52.615997: step 60, duration = 2.688
2018-04-24 15:32:20.105809: step 70, duration = 2.699
2018-04-24 15:32:47.139107: step 80, duration = 2.704
2018-04-24 15:33:14.350406: step 90, duration = 2.981
2018-04-24 15:33:39.250186: Forward-backward across 100 steps, 2.722 +/- 0.066 sec / batch
最后是所有代码:
# 这部分代码主要是建立一个完整的AlexNet卷积神经网络,对其前向传播和反向传播进行速度测试。并没有加载原始数据进行训练。
# 导入相应的系统库以及tensorflow
from datetime import datetime
import math
import time
import tensorflow as tf
# 总共测试100组batch,每组32个
batch_size = 32
num_batches = 100
# 定义一个用来显示每一卷积层或池化层的的名称以及输出尺寸
def print_activations(t):
print(t.op.name, '', t.get_shape().as_list())
def inference(images):
'''
:param images: 输入图像尺寸32*224*244*3
:return: 最后一层pool5以及parameters
'''
parameters = []
with tf.name_scope('conv1') as scope:
kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding = 'SAME')
# s = 4,"NHWC", the data is stored in the order of: [batch, height, width, channels]
biases = tf.Variable(tf.constant(0.0, shape = [64], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name = scope)
print_activations(conv1) # 输出conv1的名称和形状
parameters += [kernel, biases]
# 第一层卷积后使用局部响应归一化,提升泛化能力
# lrn1 = tf.nn.lrn(conv1, 4, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn1')
pool1 = tf.nn.max_pool(conv1, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool1')
print_activations(pool1)
with tf.name_scope('conv2') as scope:
kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [192], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv2)
# lrn2 = tf.nn.lrn(conv2, 4, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn2')
pool2 = tf.nn.max_pool(conv2, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool2')
print_activations(pool2)
with tf.name_scope('conv3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [384], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv3)
with tf.name_scope('conv4') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv4 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv4)
with tf.name_scope('conv5') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype = tf.float32, stddev = 1e-1), name = 'weights')
conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32), trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
conv5 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv5)
pool5 = tf.nn.max_pool(conv5, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool5')
print_activations(pool5)
return pool5, parameters
# 卷积过程结束,这里使用的LRN,其他文献已经放弃使用了,因为效果不明显,但这里使用后速度减低1/3,准确率的影响也不大。可使用可不使用,
def time_tensorflow_run(session, target, info_string):
'''
:param session: 创建的会话
:param target: 需要评测的运算算子
:param info_string: 测试的名称
:return:
'''
num_steps_burn_in = 10 # 让程序预热10轮,因为前几轮的运算有大量的显存加载时间,10轮以后再统计时间
total_duration = 0.0 # 统计总时间
total_duration_squared = 0.0 # 以及时间均方差
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target)
duration = time.time() - start_time
if i >= num_steps_burn_in:
if not i % 10:
print('%s: step %d, duration = %.3f' %(datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / num_batches # 平均时间
vr = total_duration_squared / num_batches - mn * mn
sd = math.sqrt(vr) # 时间标准差
print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %(datetime.now(), info_string, num_batches, mn, sd))
# 主函数
def run_benchmark():
with tf.Graph().as_default(): # 定义默认图
# 这里不用imagenet,代替的是随机数进行训练
image_size = 224
images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype = tf.float32, stddev = 1e-1))
pool5, parameters = inference(images)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# 前向传播时间计算
time_tensorflow_run(sess, pool5, "Forward")
# 反向传播时间计算
objective = tf.nn.l2_loss(pool5)
grad = tf.gradients(objective, parameters)
time_tensorflow_run(sess, grad, "Forward-backward")
run_benchmark()