一共分三个文件,是可以跑通的
resnet_utils, resnet_v2.py,resnet_v2 + cifar.py(前两个从官网下载,注释为网上摘抄和个人理解,第三个取自小蚂蚁的博客)
resnet_utils
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains building blocks for various versions of Residual Networks.
Residual networks (ResNets) were proposed in:
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015
More variants were introduced in:
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016
We can obtain different ResNet variants by changing the network depth, width,
and form of residual unit. This module implements the infrastructure for
building them. Concrete ResNet units and full ResNet networks are implemented in
the accompanying resnet_v1.py and resnet_v2.py modules.
Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
implementation we subsample the output activations in the last residual unit of
each block, instead of subsampling the input activations in the first residual
unit of each block. The two implementations give identical results but our
implementation is more memory efficient.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import tensorflow as tf
slim = tf.contrib.slim
class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
"""A named tuple describing a ResNet block.
Its parts are:
scope: The scope of the `Block`.
unit_fn: The ResNet unit function which takes as input a `Tensor` and
returns another `Tensor` with the output of the ResNet unit.
args: A list of length equal to the number of units in the `Block`. The list
contains one (depth, depth_bottleneck, stride) tuple for each unit in the
block to serve as argument to unit_fn.
"""
def subsample(inputs, factor, scope=None):
'''
降采样方法:factor:采样因子 1:不做修改直接返回 不为1:使用slim.max_pool2d降采样
'''
"""Subsamples the input along the spatial dimensions.
Args:
inputs: A `Tensor` of size [batch, height_in, width_in, channels].
factor: The subsampling factor.
scope: Optional variable_scope.
Returns:
output: A `Tensor` of size [batch, height_out, width_out, channels] with the
input, either intact (if factor == 1) or subsampled (if factor > 1).
"""
if factor == 1:
return inputs
else:
return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
""" 创建卷积层
Strided 2-D convolution with 'SAME' padding.
When stride > 1, then we do explicit zero-padding, followed by conv2d with
'VALID' padding.
Note that
net = conv2d_same(inputs, num_outputs, 3, stride=stride)
is equivalent to
net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
net = subsample(net, factor=stride)
whereas
net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
is different when the input's height or width is even, which is why we add the
current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
Args:
inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
num_outputs: An integer, the number of output filters.
kernel_size: An int with the kernel_size of the filters.
stride: An integer, the output stride.
rate: An integer, rate for atrous convolution.
scope: Scope.
Returns:
output: A 4-D tensor of size [batch, height_out, width_out, channels] with
the convolution output.
"""
if stride == 1:
'''stride为1,使用slim.conv2d,padding为SAME'''
return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,
padding='SAME', scope=scope)
else:
'''显示地pad zero:
pad zero总数为kernel size-1,pad_beg:pad//2, pad_end:余下部分'''
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
'''tf.pad对inputs进行补零操作'''
inputs = tf.pad(inputs,
[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
rate=rate, padding='VALID', scope=scope)
@slim.add_arg_scope
def stack_blocks_dense(net, blocks, output_stride=None,
outputs_collections=None):
"""Stacks ResNet `Blocks` and controls output feature density.
First, this function creates scopes for the ResNet in the form of
'block_name/unit_1', 'block_name/unit_2', etc.
Second, this function allows the user to explicitly control the ResNet
output_stride, which is the ratio of the input to output spatial resolution.
This is useful for dense prediction tasks such as semantic segmentation or
object detection.
Most ResNets consist of 4 ResNet blocks and subsample the activations by a
factor of 2 when transitioning between consecutive ResNet blocks. This results
to a nominal ResNet output_stride equal to 8. If we set the output_stride to
half the nominal network stride (e.g., output_stride=4), then we compute
responses twice.
Control of the output feature density is implemented by atrous convolution.
Args:
stack_blocks_dense(net, blocks, output_stride=None,
outputs_collections=None):
net: A `Tensor` of size [batch, height, width, channels].输入
blocks: A list of length equal to the number of ResNet `Blocks`. Each
element is a ResNet `Block` object describing the units in the `Block`. 【2,2】
output_stride: If `None`, then the output will be computed at the nominal
network stride. If output_stride is not `None`, it specifies the requested
ratio of input to output spatial resolution, which needs to be equal to
the product of unit strides from the start up to some level of the ResNet.
For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
then valid values for the output_stride are 1, 2, 6, 24 or None (which
is equivalent to output_stride=24).
outputs_collections: Collection to add the ResNet block outputs.
Returns:
net: Output tensor with stride equal to the specified output_stride.
Raises:
ValueError: If the target output_stride is not valid.
"""
# The current_stride variable keeps track of the effective stride of the
# activations. This allows us to invoke atrous convolution whenever applying
# the next residual unit would result in the activations having stride larger
# than the target output_stride.
current_stride = 1
# The atrous convolution rate parameter.
rate = 1
'''net:input
blocks:Block的class的列表
outputs_collections:收集各个end_points的collections'''
for block in blocks:
'''双层for循环,逐个Block,逐个Residual Unit堆叠'''
with tf.variable_scope(block.scope, 'block', [net]) as sc:
'''两个tf.variable将残差学习单元命名为block_1/unit_1形式'''
for i, unit in enumerate(block.args):
if output_stride is not None and current_stride > output_stride:
raise ValueError('The target output_stride cannot be reached.')
with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
# If we have reached the target output_stride, then we need to employ
# atrous convolution with stride=1 and multiply the atrous rate by the
# current unit's stride for use in subsequent layers.
'''利用第二层for循环拿到前面定义Blocks Residual Unit中args,
将其展开为depth、depth_bottleneck、stride'''
if output_stride is not None and current_stride == output_stride:
'''使用unit_fn函数(残差学习单元的生成函数)
顺序地创建并连接所有的残差学习单元'''
net = block.unit_fn(net, rate=rate, **dict(unit, stride=1)) # unit 是 {'depth_bottleneck': 64, 'stride': 1, 'depth': 256}
rate *= unit.get('stride', 1)
else:
net = block.unit_fn(net, rate=1, **unit)
current_stride *= unit.get('stride', 1)
'''slim.utils.collect_named_outputs将输出net添加到collection中'''
net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
if output_stride is not None and current_stride != output_stride:
raise ValueError('The target output_stride cannot be reached.')
return net
def resnet_arg_scope(weight_decay=0.0001, # #默认为0.0001,BN的衰减速率默认为:0.997
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True, # #BN的scale默认为True
activation_fn=tf.nn.relu,
use_batch_norm=True):
"""Defines the default ResNet arg scope.
TODO(gpapan): The batch-normalization related default values above are
appropriate for use in conjunction with the reference ResNet models
released at https://github.com/KaimingHe/deep-residual-networks. When
training ResNets from scratch, they might need to be tuned.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: The moving average decay when estimating layer activation
statistics in batch normalization.
batch_norm_epsilon: Small constant to prevent division by zero when
normalizing activations by their variance in batch normalization.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
activation_fn: The activation function which is used in ResNet.
use_batch_norm: Whether or not to use batch normalization.
Returns:
An `arg_scope` to use for the resnet models.
"""
'''创建ResNet通用的arg_scope(作用:定义某些函数的参数默认值)'''
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
'fused': None, # Use fused batch norm if possible.
}
with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=activation_fn,
normalizer_fn=slim.batch_norm if use_batch_norm else None,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
# The following implies padding='SAME' for pool1, which makes feature
# alignment easier for dense prediction tasks. This is also used in
# https://github.com/facebook/fb.resnet.torch. However the accompanying
# code of 'Deep Residual Learning for Image Recognition' uses
# padding='VALID' for pool1. You can switch to that choice by setting
# slim.arg_scope([slim.max_pool2d], padding='VALID').
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc
resnet_v2.py
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions for the preactivation form of Residual Networks.
Residual networks (ResNets) were originally proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
The full preactivation 'v2' ResNet variant implemented in this module was
introduced by:
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
The key difference of the full preactivation 'v2' variant compared to the
'v1' variant in [1] is the use of batch normalization before every weight layer.
Typical use:
from tensorflow.contrib.slim.nets import resnet_v2
ResNet-101 for image classification into 1000 classes:
# inputs has shape [batch, 224, 224, 3]
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False)
ResNet-101 for semantic segmentation into 21 classes:
# inputs has shape [batch, 513, 513, 3]
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
net, end_points = resnet_v2.resnet_v2_101(inputs,
21,
is_training=False,
global_pool=False,
output_stride=16)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
# from nets import resnet_utils
import resnet_utils
slim = tf.contrib.slim
resnet_arg_scope = resnet_utils.resnet_arg_scope # BN relu pool
@slim.add_arg_scope
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
outputs_collections=None, scope=None):
"""
Bottleneck residual unit variant with BN before convolutions.
This is the full preactivation residual unit variant proposed in [2]. See
Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
variant which has an extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output. 残差模块输出的通道数
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
bottleneck残差学习单元
inputs:输入
depth、depth_bottleneck、stride是Blocks类中的args
outputs_collections:收集end_points的collection
scope:unit的名称
"""
with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
'''slim.utils.last_dimension获取输入的最后一个维度,输出通道数,
min_rank=4限定最少为4个维度'''
depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
print(inputs.get_shape)
preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
'''定义shortcut(直连的x)'''
if depth == depth_in:
'''如果残差单元输入通道数和输出通道数一样
使用subsample按步长对inputs进行空间上的降采样'''
shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
else:
'''如果残差单元输入通道数和输出通道数不一样,
使用stride步长的1x1卷积改变其通道数,使得输入通道数和输出通道数一致'''
shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
normalizer_fn=None, activation_fn=None,
scope='shortcut')
'''定义残差:
第一步:1x1尺寸、步长为1、输出通道数为depth_bottleneck的卷积
第二步:3x3尺寸、步长为stride、输出通道数为depth_bottleneck的卷积
第三步:1x1尺寸、步长为1、输出通道数为depth的卷积'''
residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
scope='conv1')
residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
rate=rate, scope='conv2')
residual = slim.conv2d(residual, depth, [1, 1], stride=1,
normalizer_fn=None, activation_fn=None,
scope='conv3')
output = shortcut + residual
'''slim.utils.collect_named_ouputs将结果添加到outputs_collections
并返回output作为函数结果'''
return slim.utils.collect_named_outputs(outputs_collections,
sc.name,
output)
def resnet_v2(inputs,
blocks,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
include_root_block=True,
spatial_squeeze=True,
reuse=None,
scope=None):
# blocks = [resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
# resnet_v2_block('block2', base_depth=128, num_units=4, stride=2),
# resnet_v2_block('block3', base_depth=256, num_units=6, stride=2),
# resnet_v2_block('block4', base_depth=512, num_units=3, stride=1) ]
"""
定义生成ResNet V2的主函数
inputs:输入
blocks:定义好的Blocks类的的列表
num_classes:最后输出的类数
global_pool:是否加上最后的一层全局平均池化的标志
include_root_blocks:是否加上ResNet网络最前面通常使用的7x7卷积核最大池化的标志
reuse:是否重用的标志
scope:整个网络名称
Generator for v2 (preactivation) ResNet models.
This function generates a family of ResNet v2 models. See the resnet_v2_*()
methods for specific model instantiations, obtained by selecting different
block instantiations that produce ResNets of various depths.
Training for image classification on Imagenet is usually done with [224, 224]
inputs, resulting in [7, 7] feature maps at the output of the last ResNet
block for the ResNets defined in [1] that have nominal stride equal to 32.
However, for dense prediction tasks we advise that one uses inputs with
spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
this case the feature maps at the ResNet output will have spatial shape
[(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
and corners exactly aligned with the input image corners, which greatly
facilitates alignment of the features to the image. Using as input [225, 225]
images results in [8, 8] feature maps at the output of the last ResNet block.
For dense prediction tasks, the ResNet needs to run in fully-convolutional
(FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
have nominal stride equal to 32 and a good choice in FCN mode is to use
output_stride=16 in order to increase the density of the computed features at
small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
blocks: A list of length equal to the number of ResNet blocks. Each element
is a resnet_utils.Block object describing the units in the block.
num_classes: Number of predicted classes for classification tasks.
If 0 or None, we return the features before the logit layer.
is_training: whether batch_norm layers are in training mode.
global_pool: If True, we perform global average pooling before computing the
logits. Set to True for image classification, False for dense prediction.
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
include_root_block: If True, include the initial convolution followed by
max-pooling, if False excludes it. If excluded, `inputs` should be the
results of an activation-less convolution.
spatial_squeeze: if True, logits is of shape [B, C], if false logits is
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
To use this parameter, the input images must be smaller than 300x300
pixels, in which case the output logit layer does not contain spatial
information and can be removed.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
Returns:
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
If global_pool is False, then height_out and width_out are reduced by a
factor of output_stride compared to the respective height_in and width_in,
else both height_out and width_out equal one. If num_classes is 0 or None,
then net is the output of the last ResNet block, potentially after global
average pooling. If num_classes is a non-zero integer, net contains the
pre-softmax activations.
end_points: A dictionary from components of the network to the corresponding
activation.
Raises:
ValueError: If the target output_stride is not valid.
"""
with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
end_points_collection = sc.original_name_scope + '_end_points'
'''slim.arg_scope将slim.conv2d, bottleneck,stack_blocks_dense 3个函数的参数
outputs_collections默认设置为end_points_collection'''
with slim.arg_scope([slim.conv2d, bottleneck,
resnet_utils.stack_blocks_dense],
outputs_collections=end_points_collection):
with slim.arg_scope([slim.batch_norm], is_training=is_training):
net = inputs
# print(net.shape)
if include_root_block:
if output_stride is not None:
if output_stride % 4 != 0:
raise ValueError('The output_stride needs to be a multiple of 4.')
output_stride /= 4
# We do not include batch normalization or activation functions in
# conv1 because the first ResNet unit will perform these. Cf.
# Appendix of [2].
with slim.arg_scope([slim.conv2d],
activation_fn=None, normalizer_fn=None):
'''根据include_root_block标记,创建ResNet
最前面的64输出通道的步长为2的7x7卷积'''
net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
'''步长为2的3x3最大池化,经过2次步长为2的层后,图片尺寸已经缩小为1/4'''
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
'''利用stack_blocks_dens将残差学习模块完成'''
net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
# This is needed because the pre-activation variant does not have batch
# normalization or activation functions in the residual unit output. See
# Appendix of [2].
net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
# Convert end_points_collection into a dictionary of end_points.
'''slim.utils.convert_collection_to_dict将collection转化为dict'''
end_points = slim.utils.convert_collection_to_dict(
end_points_collection)
if global_pool:
'''根据标记添加平均池化层,这里用tf.reduce_mean比avg_pool高'''
# Global average pooling.
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
end_points['global_pool'] = net
if num_classes is not None:
'''根据是否有分类数,添加一个输出通道为num_classes的1x1卷积'''
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='logits')
end_points[sc.name + '/logits'] = net
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
end_points[sc.name + '/spatial_squeeze'] = net
'''添加一个softmax层输出网络结果'''
end_points['predictions'] = slim.softmax(net, scope='predictions')
return net, end_points
resnet_v2.default_image_size = 224
def resnet_v2_block(scope, base_depth, num_units, stride):
# blocks = [resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
"""Helper function for creating a resnet_v2 bottleneck block.
Args:
scope: The scope of the block.
base_depth: The depth of the bottleneck layer for each unit.
num_units: The number of units in the block.
stride: The stride of the block, implemented as a stride in the last unit.
All other units have stride=1.
Returns:
A resnet_v2 bottleneck block.
blocks = [
Block('block1', bottleneck, [(256, 64, 1)]*2 + [(256, 64, 2)]),
"""
return resnet_utils.Block(scope, bottleneck, [{
'depth': base_depth * 4,
'depth_bottleneck': base_depth,
'stride': 1
}] * (num_units - 1) + [{
'depth': base_depth * 4,
'depth_bottleneck': base_depth,
'stride': stride
}])
resnet_v2.default_image_size = 224
def resnet_v2_50(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v2_50'):
"""
设计50层的ResNet
四个blocks的units数量为3、4、6、3,总层数为(3+4+6+3)*3+2=50
前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048
ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
# blocks = [
# Block('block1', bottleneck, [(256, 64, 1)]*2 + [(256, 64, 2)]),
# Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
# Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
# Block('block4', bottleneck, [(2048, 512, 1)] * 3)
# ]
blocks = [
resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
resnet_v2_block('block2', base_depth=128, num_units=4, stride=2),
resnet_v2_block('block3', base_depth=256, num_units=6, stride=2),
resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
]
print(inputs.get_shape)
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, spatial_squeeze=spatial_squeeze,
reuse=reuse, scope=scope)
resnet_v2_50.default_image_size = resnet_v2.default_image_size
def resnet_v2_101(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v2_101'):
"""
设计101层的ResNet
四个blocks的units数量为3、4、23、3,总层数为(3+4+23+3)*3+2=101
前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
blocks = [
resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
resnet_v2_block('block2', base_depth=128, num_units=4, stride=2),
resnet_v2_block('block3', base_depth=256, num_units=23, stride=2),
resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
]
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, spatial_squeeze=spatial_squeeze,
reuse=reuse, scope=scope)
resnet_v2_101.default_image_size = resnet_v2.default_image_size
def resnet_v2_152(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v2_152'):
"""
设计152层的ResNet
四个blocks的units数量为3、8、36、3,总层数为(3+8+36+3)*3+2=152
前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
blocks = [
resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
resnet_v2_block('block2', base_depth=128, num_units=8, stride=2),
resnet_v2_block('block3', base_depth=256, num_units=36, stride=2),
resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
]
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, spatial_squeeze=spatial_squeeze,
reuse=reuse, scope=scope)
resnet_v2_152.default_image_size = resnet_v2.default_image_size
def resnet_v2_200(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v2_200'):
"""
设计200层的ResNet
四个blocks的units数量为3、8、36、3,总层数为(3+24+36+3)*3+2=200
前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048
ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
blocks = [
resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
resnet_v2_block('block2', base_depth=128, num_units=24, stride=2),
resnet_v2_block('block3', base_depth=256, num_units=36, stride=2),
resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
]
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, spatial_squeeze=spatial_squeeze,
reuse=reuse, scope=scope)
resnet_v2_200.default_image_size = resnet_v2.default_image_size
if __name__ == '__main__':
import collections
import tensorflow as tf
from datetime import datetime
import math
import time
def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10
total_duration = 0.0
total_duration_squared = 0.0
for i in range(num_batches+num_steps_burn_in): # 100 + 10
start_time = time.time()
result = session.run(target) # 返回值net, net是未经softmax,(endpoint是经过的softmax的)
# print(type(result)) <class 'numpy.ndarray'>
# print(result.shape, result[0].shape, result[1].shape)
duration = time.time()-start_time
if i >= num_steps_burn_in:
if not i % 10: # 当运行次数,大于10,并且整十时,打印 10, 20,30...
print('%s: step %d, duration = %.3f' %(datetime.now(), i-num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration*duration
mn = total_duration/num_batches
vr = total_duration_squared/num_batches-mn*mn
sd = math.sqrt(vr)
print('%s: %s across %d steps, %.3f +/- %3.3f sec/batch' %(datetime.now(), info_string, num_batches, mn, sd))
batch_size = 32
height, width = 224, 224
inputs = tf.random_uniform((batch_size, height, width, 3))
with slim.arg_scope(resnet_arg_scope()):
net, end_points = resnet_v2_50(inputs, 1000)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
num_batches = 10
time_tensorflow_run(sess, net, 'Forward')
resnet_v2 + cifar.py
import tensorflow as tf
import os
import numpy as np
import pickle
import resnet_v2
import resnet_utils
slim = tf.contrib.slim
resnet_arg_scope = resnet_utils.resnet_arg_scope
# 文件存放目录
CIFAR_DIR = "../cifar/cifar-10-batches-py"
def load_data(filename):
'''read data from data file'''
with open(filename, 'rb') as f:
data = pickle.load(f, encoding='bytes') # python3 需要添加上encoding='bytes'
return data[b'data'], data[b'labels'] # 并且 在 key 前需要加上 b
class CifarData:
def __init__(self, filenames, need_shuffle):
'''参数1:文件夹 参数2:是否需要随机打乱'''
all_data = []
all_labels = []
for filename in filenames:
# 将所有的数据,标签分别存放在两个list中
data, labels = load_data(filename)
all_data.append(data)
all_labels.append(labels)
# 将列表 组成 一个numpy类型的矩阵!!!!
self._data = np.vstack(all_data)
# 对数据进行归一化, 尺度固定在 [-1, 1] 之间
self._data = self._data / 127.5 - 1
# 将列表,变成一个 numpy 数组
self._labels = np.hstack(all_labels)
# 记录当前的样本 数量
self._num_examples = self._data.shape[0]
# 保存是否需要随机打乱
self._need_shuffle = need_shuffle
# 样本的起始点
self._indicator = 0
# 判断是否需要打乱
if self._need_shuffle:
self._shffle_data()
def _shffle_data(self):
# np.random.permutation() 从 0 到 参数,随机打乱
p = np.random.permutation(self._num_examples)
# 保存 已经打乱 顺序的数据
self._data = self._data[p]
self._labels = self._labels[p]
def next_batch(self, batch_size):
'''return batch_size example as a batch'''
# 开始点 + 数量 = 结束点
end_indictor = self._indicator + batch_size
# 如果结束点大于样本数量
if end_indictor > self._num_examples:
if self._need_shuffle:
# 重新打乱
self._shffle_data()
# 开始点归零,从头再来
self._indicator = 0
# 重新指定 结束点. 和上面的那一句,说白了就是重新开始
end_indictor = batch_size # 其实就是 0 + batch_size, 把 0 省略了
else:
raise Exception("have no more examples")
# 再次查看是否 超出边界了
if end_indictor > self._num_examples:
raise Exception("batch size is larger than all example")
# 把 batch 区间 的data和label保存,并最后return
batch_data = self._data[self._indicator:end_indictor]
batch_labels = self._labels[self._indicator:end_indictor]
self._indicator = end_indictor
return batch_data, batch_labels
# 拿到所有文件名称
train_filename = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]
# 拿到标签
test_filename = [os.path.join(CIFAR_DIR, 'test_batch')]
# 拿到训练数据和测试数据
train_data = CifarData(train_filename, True)
test_data = CifarData(test_filename, False)
# 设计计算图
# 形状 [None, 3072] 3072 是 样本的维数, None 代表位置的样本数量
x = tf.placeholder(tf.float32, [None, 3072])
# 形状 [None] y的数量和x的样本数是对应的
y = tf.placeholder(tf.int64, [None])
# [None, ], eg: [0, 5, 6, 3]
x_image = tf.reshape(x, [-1, 3, 32, 32])
# 将最开始的向量式的图片,转为真实的图片类型
x_image = tf.transpose(x_image, perm= [0, 2, 3, 1])
# x_image = tf.cast(x_image, tf.float32)
# with slim.arg_scope(resnet_arg_scope()):
net, end_points = resnet_v2.resnet_v2_50(x_image, 10)
y_ = net
# print(y_.dtype) # <dtype: 'float32'>
# 使用交叉熵 设置损失函数
loss = tf.losses.sparse_softmax_cross_entropy(labels = y, logits = y_)
# loss = end_points['predictions']
# y = tf.one_hot(y, 10, dtype=tf.int32)
# loss= loss[np.arange(loss.shape[0]),y]
# print(loss.shape)
# loss = -tf.reduce_sum(y * tf.log( loss), 1)
# 预测值 获得的是 每一行上 最大值的 索引.注意:tf.argmax()的用法,其实和 np.argmax() 一样的
predict = tf.argmax(y_, 1) # 默认返回的 dtpye= int64
# 将布尔值转化为int类型,也就是 0 或者 1, 然后再和真实值进行比较. tf.equal() 返回值是布尔类型
correct_prediction = tf.equal(predict, y)
# 比如说第一行最大值索引是6,说明是第六个分类.而y正好也是6,说明预测正确
# 将上句的布尔类型 转化为 浮点类型,然后进行求平均值,实际上就是求出了准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))
with tf.name_scope('train_op'): # tf.name_scope() 定义该变量的命名空间
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss) # 将 损失函数 降到 最低
# 初始化变量
init = tf.global_variables_initializer()
batch_size = 20
train_steps = 10000
test_steps = 100
with tf.Session() as sess:
sess.run(init) # 注意: 这一步必须要有!!
# 开始训练
for i in range(train_steps):
# 得到batch
batch_data, batch_labels = train_data.next_batch(batch_size)
# 获得 损失值, 准确率
loss_val, acc_val, _ = sess.run([loss, accuracy, train_op], feed_dict={x:batch_data, y:batch_labels})
# 每 500 次 输出一条信息
if (i+1) % 500 == 0:
print('[Train] Step: %d, loss: %4.5f, acc: %4.5f' % (i+1, loss_val, acc_val))
# 每 5000 次 进行一次 测试
if (i+1) % 5000 == 0:
# 获取数据集,但不随机
test_data = CifarData(test_filename, False)
all_test_acc_val = []
for j in range(test_steps):
test_batch_data, test_batch_labels = test_data.next_batch(batch_size)
test_acc_val = sess.run([accuracy], feed_dict={ x:test_batch_data, y:test_batch_labels })
all_test_acc_val.append(test_acc_val)
test_acc = np.mean(all_test_acc_val)
print('[Test ] Step: %d, acc: %4.5f' % ((i+1), test_acc))