

1.1 安装python,numpy,matplotlib

 import numpy as np
 import matplotlib.pyplot as plt
 %matplotlib inline

 plt.rcParams['figure.figsize'] = (10, 10)        # 图像显示大小
 plt.rcParams['image.interpolation'] = 'nearest'  # 最近邻差值: 像素为正方形
 plt.rcParams['image.cmap'] = 'gray'              # 使用灰度输出而不是彩色输出

1.2 加载 caffe

import sys
caffe_root = '/home/shine/caffe/' 
sys.path.insert(0, caffe_root + 'python')
import caffe
# 如果你看到"No module named _caffe",那么要么就是你没有正确编译pycaffe;要么就是你的路径有错误。

1.3 下载CaffeNet模型,该模型是AlexNet的变形

import os
if os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
     print 'CaffeNet found.'
     print 'Downloading pre-trained CaffeNet model...'
     !../scripts/ ../models/bvlc_reference_caffenet

2 加载网络并设置输入预处理

2.1 将Caffe设置为CPU模式,并从硬盘加载网络

model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'    #注意这里使用deploy.prototxt
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,      # 定义模型结构
                model_weights,  # 包含了模型的训练权值
                caffe.TEST)     # 使用测试模式(不执行dropout)

2.2 设置输入预处理


# 加载ilsvrc12数据集的图像均值 (随着Caffe一起发布的)
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)                      #对所有像素值取平均以此获取BGR的均值像素值
print 'mean-subtracted values:', zip('BGR', mu)

# 对输入数据进行变换
transformer ={'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))    #还没弄清楚?
transformer.set_mean('data', mu)              #对于每个通道,都减去BGR的均值像素值
transformer.set_raw_scale('data', 255)        #将像素值从[0,255]变换到[0,1]之间
transformer.set_channel_swap('data', (2,1,0)) #交换通道,从RGB变换到BGR

3 用CPU分类

3.1 开始分类,尽管我们只对一张图像进行分类,不过我们将batch的大小设置为50,以此来演示batching

# 设置输入图像大小
net.blobs['data'].reshape(50,        # batch 大小
                          3,         # 3-channel (BGR) images
                          227, 227)  # 图像大小为:227x227

3.2 加载图像,并进行预处理

image = + 'examples/images/cat.jpg')
transformed_image = transformer.preprocess('data', image)

cnn Python分类模型改为预测模型_python

3.3 进行识别分类

3.3.1 分类

# 将图像数据拷贝到为net分配的内存中
net.blobs['data'].data[...] = transformed_image

# 执行分类
output = net.forward()  
output_prob = output['prob'][0]  #batch中第一张图像的概率值   
print 'predicted class is:', output_prob.argmax()

输出:predictd class is: 281

3.3.1 输出类别标签

# 加载标签
labels_file = caffe_root + 'data/ilsvrc12/det_synset_words.txt'
if not os.path.exists(labels_file):
labels = np.loadtxt(labels_file, str, delimiter='\t')
# 输出对应的类别标签
print 'output label:', labels[output_prob.argmax()]

输出:output label: n02123043 tabby, tabby cat

3.3.2 输出置信度较高的前几个类别

# 查看置性度较高的几个结果
# sort top five predictions from softmax output
top_inds = output_prob.argsort()[::-1][:5]  # reverse sort and take five largest items
print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])

4 测试网络的中间层输出

4.1 读取网络的结构 

对于每一层参数为(batch_size, channel_dim, height, width)

# 对于每一层,显示输出类型。
for layer_name, blob in net.blobs.iteritems():
      print layer_name + '\t' + str(


data (50, 3, 227, 227)conv1 (50, 96, 55, 55) pool1 (50, 96, 27, 27) norm1 (50, 96, 27, 27) conv2 (50, 256, 27, 27) pool2 (50, 256, 13, 13) norm2 (50, 256, 13, 13) conv3 (50, 384, 13, 13) conv4 (50, 384, 13, 13) conv5 (50, 256, 13, 13) pool5 (50, 256, 6, 6) fc6 (50, 4096) fc7 (50, 4096) fc8 (50, 1000) prob (50, 1000)

4.2 读取网络的参数

net.params[0]表示 biases, weights: (output_channels, input_channels, filter_height, filter_width),biases:


for layer_name, param in net.params.iteritems():
       print layer_name + '\t' + str(param[0].data.shape), str(param[1].data.shape)


conv1	(96, 3, 11, 11) (96,)
conv2	(256, 48, 5, 5) (256,)
conv3	(384, 256, 3, 3) (384,)
conv4	(384, 192, 3, 3) (384,)
conv5	(256, 192, 3, 3) (256,)
fc6	        (4096, 9216) (4096,)
fc7	        (4096, 4096) (4096,)
fc8	        (1000, 4096) (1000,)
def vis_square(data):
        # 输入一个形如:(n, height, width) or (n, height, width, 3)的数组,并对每一个形如(height,width)的特征进行可视化

        # 正则化数据
        data = (data - data.min()) / (data.max() - data.min())

        # 将滤波器的核转变为正方形
        n = int(np.ceil(np.sqrt(data.shape[0])))
        padding = (((0, n ** 2 - data.shape[0]),
                   (0, 1), (0, 1))                 # 在相邻的滤波器之间加入空白 
                   + ((0, 0),) * (data.ndim - 3))  # 不扩展最后一维
        data = np.pad(data, padding, mode='constant', constant_values=1)  # 扩展一个像素(白色)

        # tile the filters into an image
        data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
        data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])

# 参数为一个[weights, biases]的列表
filters = net.params['conv1'][0].data
vis_square(filters.transpose(0, 2, 3, 1))
feat = net.blobs['conv1'].data[0, :36]
feat = net.blobs['pool5'].data[0]
feat = net.blobs['fc6'].data[0]
plt.subplot(2, 1, 1)
plt.subplot(2, 1, 2)
_ = plt.hist(feat.flat[feat.flat > 0], bins=100)