主要参考:《深度学习——caffe之经典模型详解与实战》
kaggle数据集下载链接:https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data
数据集的说明:
一共由两个文件:
train.zip:训练集,图片文件命名格式:cat.X.jpg,dog.X.jpg,这个是一个二分类问题,需要根据cat/dog的类别进行0/1分类,即将cat->0,dog->1
test.zip:用来进行检测,命名格式:X.jpg,用来验证算法的识别准确率
step1:将cat_dog进行分类:
根据cat/dog的类别进行0/1分类,即将cat->0,dog->1
# -*- coding:utf-8 -*-
__author__ = 'xuy'
import commands
import os
import re
import random
my_project='/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/'
train_root_dir='dataset/train'
test_root_dir='dataset/test'
train_save_name='mytrain.txt'
fw_train=open(train_save_name,'w')#打开的是文件名称
for parent,dirnames,filenames in os.walk(train_root_dir):#三个参数:分别返回1.父目录 2.所有文件夹名字(不含路径) 3.所有文件名字
#开始遍历train文件夹下面的所有的文件名称
for filename in filenames:
train_all_file_name = os.path.join(parent, filename) # 添加了全路径名称
if filename[:3]=='cat':
label=0
train_writeFile_content=train_all_file_name+' '+str(label)+'\n'
# fw_train.write(train_writeFile_content)
elif filename[:3]=='dog':
label=1
train_writeFile_content=train_all_file_name+' '+str(label)+'\n'
fw_train.write(train_writeFile_content)
fw_train.close()
step2:根据第一步生成的txt文件生成lmdb文件
# -*- coding: UTF-8 -*-
__author__ = 'xuy'
import commands
import os
import re
import caffe
caffe_root='/home/xuy/caffe/'
'''
我在txt文件里面已经写好了train,val的路径,train/0/***.jpg or val/***.jpg
'''
my_project_path='/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/'
images_train_path=my_project_path+'dataset/train/'
# print images_train_path
txt_train_path=my_project_path+'mytrain.txt'
def create_lmdb(caffe_root, images_path, txt_save_path,lmdb_save_path):
#convert_imageset工具路径,这个工具是caffe系统自带的,
# 因此路径是caffe_root,利用命令,将txt文件中的照片文件名转化为mdb格式
convert_imageset_path = caffe_root + 'build/tools/convert_imageset'
cmd = """%s --shuffle --resize_height=32 --resize_width=32 %s %s %s"""#对于这4个%s进行说明:第一个:convert_imageset(caffe_home里面自带的工具,需要指明该工具的路径),第二个:图片的来源,这里是至train和val文件夹下面的路径,第三个:生成的train.txt以及val.txt这两个文件,第四个:lmdb保存路径
status, output = commands.getstatusoutput(cmd % (convert_imageset_path, images_path,
txt_save_path, lmdb_save_path))
print output
if(status == 0):
print "lmbd文件生成成功"
lmdb_train_path=my_project_path+'img_train.lmdb'
create_lmdb(caffe_root,my_project_path,txt_train_path,lmdb_train_path)#生成train.lmdb,第二个参数:由于train.txt文件当中已经有了dataset/train这个路径,因此第二个参数仅仅需要加上my_project这个前缀路径即可
step3:制作val_dataset,将trainset当中的20%当作valset,也就是随机在trainset当中筛选20%的图片,复制到valset文件夹当中
# -*- coding:utf-8 -*-
__author__ = 'xuy'
import os
import shutil
import random
root_dir = '/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/dataset/my_train'
output_dir = '/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/dataset/val'
ref = 1
for root, dirs, files in os.walk(root_dir):
number_of_files = len(os.listdir(root))
if number_of_files > ref:
ref_copy = int(round(0.2 * number_of_files))#随机筛选20%的图片到新建的文件夹当中
for i in xrange(ref_copy):
chosen_one = random.choice(os.listdir(root))
file_in_track = root
file_to_copy = file_in_track + '/' + chosen_one
if os.path.isfile(file_to_copy) == True:
shutil.copy(file_to_copy,output_dir)
print file_to_copy
else:
for i in xrange(len(files)):
track_list = root
file_in_track = files[i]
file_to_copy = track_list + '/' + file_in_track
if os.path.isfile(file_to_copy) == True:
shutil.copy(file_to_copy,output_dir)
print file_to_copy
print 'Finished !'
step4:构造神经网络:train_val.prototxt,套路和上一篇博客一样,基于pycaffe搭建神经网络
# -*- coding:utf-8 -*-
__author__ = 'xuy'
import caffe
frozen_weight_param = dict(lr_mult=1)#权重
frozen_bias_param = dict(lr_mult=2)#偏执值
froozen_param = [frozen_weight_param, frozen_bias_param]
def write_layer():
net = caffe.NetSpec()
net.data,net.label=caffe.layers.Data(
name='catdog',
include=dict(phase=caffe.TRAIN),
ntop=2,
source='/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/img_train.lmdb',
backend = caffe.params.Data.LMDB,
batch_size=16,
transform_param = dict(
scale=1 / 255.
)
)
net.conv1 = caffe.layers.Convolution(
net.data,
param=froozen_param, # 这里通过定义一个list,来整合到param的字典,也就是:param=[]
num_output=32,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu1=caffe.layers.ReLU(
net.conv1,
in_place=True
)
net.conv2=caffe.layers.Convolution(
net.conv1,
param=froozen_param,
num_output=32,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu2=caffe.layers.ReLU(
net.conv2,
in_place=True
)
net.pool1=caffe.layers.Pooling(
net.conv2,
pool=caffe.params.Pooling.MAX,
kernel_size=2,
stride=2
)
net.conv3=caffe.layers.Convolution(
net.pool1,
param=froozen_param,
num_output=64,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu3 = caffe.layers.ReLU(
net.conv3,
in_place=True
)
net.conv4=caffe.layers.Convolution(
net.conv3,
param=froozen_param,
num_output=64,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu4 = caffe.layers.ReLU(
net.conv4,
in_place=True
)
net.pool2 = caffe.layers.Pooling(
net.conv4,
pool=caffe.params.Pooling.MAX,
kernel_size=2,
stride=2
)
net.conv5 = caffe.layers.Convolution(
net.pool2,
param=froozen_param,
num_output=128,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu5 = caffe.layers.ReLU(
net.conv5,
in_place=True
)
net.conv6 = caffe.layers.Convolution(
net.conv5,
param=froozen_param,
num_output=128,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu6 = caffe.layers.ReLU(
net.conv6,
in_place=True
)
net.pool3 = caffe.layers.Pooling(
net.conv6,
pool=caffe.params.Pooling.MAX,
kernel_size=2,
stride=2
)
net.conv7 = caffe.layers.Convolution(
net.pool3,
param=froozen_param,
num_output=256,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu7 = caffe.layers.ReLU(
net.conv7,
in_place=True
)
net.conv8 = caffe.layers.Convolution(
net.conv7,
param=froozen_param,
num_output=256,
pad=1,
kernel_size=3,
stride=1,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu8 = caffe.layers.ReLU(
net.conv8,
in_place=True
)
net.pool4 = caffe.layers.Pooling(
net.conv8,
pool=caffe.params.Pooling.MAX,
kernel_size=2,
stride=2
)
net.ip1=caffe.layers.InnerProduct(
net.pool4,
param=froozen_param,
num_output=256,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.drop1=caffe.layers.Dropout(
net.ip1,
in_place=True,
dropout_param=dict(dropout_ratio=0.5)
)
net.ip2=caffe.layers.InnerProduct(
net.ip1,
param=froozen_param,
num_output=256,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.relu9 = caffe.layers.ReLU(
net.ip2,
in_place=True
)
net.drop2 = caffe.layers.Dropout(
net.ip2,
in_place=True,
dropout_param=dict(dropout_ratio=0.5)
)
net.ip3=caffe.layers.InnerProduct(
net.ip2,
param=froozen_param,
num_output=2,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net.loss=caffe.layers.SoftmaxWithLoss(net.ip3,net.label)
net.accuracy=caffe.layers.Accuracy(
net.ip3,
net.label,
include=dict(phase=caffe.TEST)
)
return net.to_proto()
with open('train.prototxt', 'w') as f:
f.write(str(write_layer()))
生成的train.prototxt的内容:
layer {
name: "catdog"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00392156885937
}
data_param {
source: "/home/xuy/\346\241\214\351\235\242/code/python/caffe_code/kaggle_dogcat_classification/img_train.lmdb"
batch_size: 16
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 32
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "conv1"
top: "conv2"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 32
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool1"
top: "conv3"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv4"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5"
type: "Convolution"
bottom: "pool2"
top: "conv5"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "conv6"
type: "Convolution"
bottom: "conv5"
top: "conv6"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv6"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "pool3"
top: "conv7"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "conv7"
top: "conv7"
}
layer {
name: "conv8"
type: "Convolution"
bottom: "conv7"
top: "conv8"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu8"
type: "ReLU"
bottom: "conv8"
top: "conv8"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv8"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool4"
top: "ip1"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 256
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "drop1"
type: "Dropout"
bottom: "ip1"
top: "ip1"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 256
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu9"
type: "ReLU"
bottom: "ip2"
top: "ip2"
}
layer {
name: "drop2"
type: "Dropout"
bottom: "ip2"
top: "ip2"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "ip3"
type: "InnerProduct"
bottom: "ip2"
top: "ip3"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip3"
bottom: "label"
top: "loss"
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip3"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
最后附上solver.prototxt的定义:
net:"你的路径"
test_iter:50#test.prototxt的batch_size是16,因此每进行一次epoth,读取50*16=800张图片
test_interval:500#每进行500次训练进行一次测试
base_lr:0.0001#推荐看2018年fast ai当中的视频,find_lr()函数可以找出最优的lr
momentum:0.9
weight_decay:0.0005
lr_policy:"fixed"
gamma:0.0001
power:0.75
display:100#每100次迭代显示一次
max_iter:100000
snapshot:5000
snapshot_prefix:example/catdog/catdog
solver_mode:GPU