主要参考:《深度学习——caffe之经典模型详解与实战》

kaggle数据集下载链接:https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data


数据集的说明:

一共由两个文件:

train.zip:训练集,图片文件命名格式:cat.X.jpg,dog.X.jpg,这个是一个二分类问题,需要根据cat/dog的类别进行0/1分类,即将cat->0,dog->1

test.zip:用来进行检测,命名格式:X.jpg,用来验证算法的识别准确率


step1:将cat_dog进行分类:

根据cat/dog的类别进行0/1分类,即将cat->0,dog->1


# -*- coding:utf-8 -*- 
__author__ = 'xuy'

import commands
import os
import re
import random



my_project='/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/'
train_root_dir='dataset/train'
test_root_dir='dataset/test'

train_save_name='mytrain.txt'

fw_train=open(train_save_name,'w')#打开的是文件名称

for parent,dirnames,filenames in os.walk(train_root_dir):#三个参数:分别返回1.父目录 2.所有文件夹名字(不含路径) 3.所有文件名字
#开始遍历train文件夹下面的所有的文件名称
    for filename in filenames:

        train_all_file_name = os.path.join(parent, filename)  # 添加了全路径名称

        if filename[:3]=='cat':
            label=0
            train_writeFile_content=train_all_file_name+' '+str(label)+'\n'
            # fw_train.write(train_writeFile_content)

        elif filename[:3]=='dog':
            label=1
            train_writeFile_content=train_all_file_name+' '+str(label)+'\n'

        fw_train.write(train_writeFile_content)

fw_train.close()

step2:根据第一步生成的txt文件生成lmdb文件


# -*- coding: UTF-8 -*-
__author__ = 'xuy'


import commands
import os
import re
import caffe

caffe_root='/home/xuy/caffe/'

'''
我在txt文件里面已经写好了train,val的路径,train/0/***.jpg or val/***.jpg
'''
my_project_path='/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/'

images_train_path=my_project_path+'dataset/train/'

# print images_train_path

txt_train_path=my_project_path+'mytrain.txt'




def create_lmdb(caffe_root, images_path, txt_save_path,lmdb_save_path):

    #convert_imageset工具路径,这个工具是caffe系统自带的,
    # 因此路径是caffe_root,利用命令,将txt文件中的照片文件名转化为mdb格式
    convert_imageset_path = caffe_root + 'build/tools/convert_imageset'
    cmd = """%s --shuffle --resize_height=32 --resize_width=32 %s %s %s"""#对于这4个%s进行说明:第一个:convert_imageset(caffe_home里面自带的工具,需要指明该工具的路径),第二个:图片的来源,这里是至train和val文件夹下面的路径,第三个:生成的train.txt以及val.txt这两个文件,第四个:lmdb保存路径
    status, output = commands.getstatusoutput(cmd % (convert_imageset_path, images_path,
        txt_save_path, lmdb_save_path))
    print output
    if(status == 0):
        print "lmbd文件生成成功"


lmdb_train_path=my_project_path+'img_train.lmdb'

create_lmdb(caffe_root,my_project_path,txt_train_path,lmdb_train_path)#生成train.lmdb,第二个参数:由于train.txt文件当中已经有了dataset/train这个路径,因此第二个参数仅仅需要加上my_project这个前缀路径即可



step3:制作val_dataset,将trainset当中的20%当作valset,也就是随机在trainset当中筛选20%的图片,复制到valset文件夹当中

# -*- coding:utf-8 -*- 
__author__ = 'xuy'

import os
import shutil
import random

root_dir = '/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/dataset/my_train'
output_dir = '/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/dataset/val'
ref = 1

for root, dirs, files in os.walk(root_dir):
    number_of_files = len(os.listdir(root))
    if number_of_files > ref:
        ref_copy = int(round(0.2 * number_of_files))#随机筛选20%的图片到新建的文件夹当中
        for i in xrange(ref_copy):
            chosen_one = random.choice(os.listdir(root))
            file_in_track = root
            file_to_copy = file_in_track + '/' + chosen_one
            if os.path.isfile(file_to_copy) == True:
                shutil.copy(file_to_copy,output_dir)
                print file_to_copy
    else:
        for i in xrange(len(files)):
            track_list = root
            file_in_track = files[i]
            file_to_copy = track_list + '/' + file_in_track
            if os.path.isfile(file_to_copy) == True:
                shutil.copy(file_to_copy,output_dir)
                print file_to_copy
print 'Finished !'



step4:构造神经网络:train_val.prototxt,套路和上一篇博客一样,基于pycaffe搭建神经网络


# -*- coding:utf-8 -*- 
__author__ = 'xuy'

import caffe


frozen_weight_param = dict(lr_mult=1)#权重
frozen_bias_param = dict(lr_mult=2)#偏执值
froozen_param = [frozen_weight_param, frozen_bias_param]
def write_layer():
    net = caffe.NetSpec()
    net.data,net.label=caffe.layers.Data(
        name='catdog',
        include=dict(phase=caffe.TRAIN),
        ntop=2,
        source='/home/xuy/桌面/code/python/caffe_code/kaggle_dogcat_classification/img_train.lmdb',
        backend = caffe.params.Data.LMDB,
        batch_size=16,
        transform_param = dict(
            scale=1 / 255.
        )
    )
    net.conv1 = caffe.layers.Convolution(
        net.data,
        param=froozen_param,  # 这里通过定义一个list,来整合到param的字典,也就是:param=[]
        num_output=32,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')

    )
    net.relu1=caffe.layers.ReLU(
        net.conv1,
        in_place=True
        )
    net.conv2=caffe.layers.Convolution(
        net.conv1,
        param=froozen_param,
        num_output=32,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')



    )
    net.relu2=caffe.layers.ReLU(
        net.conv2,
        in_place=True
        )

    net.pool1=caffe.layers.Pooling(
            net.conv2,
            pool=caffe.params.Pooling.MAX,
            kernel_size=2,
            stride=2
            )

    net.conv3=caffe.layers.Convolution(
        net.pool1,
        param=froozen_param,
        num_output=64,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
    )
    net.relu3 = caffe.layers.ReLU(
        net.conv3,
        in_place=True
    )
    net.conv4=caffe.layers.Convolution(
        net.conv3,
        param=froozen_param,
        num_output=64,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
    )
    net.relu4 = caffe.layers.ReLU(
        net.conv4,
        in_place=True
    )
    net.pool2 = caffe.layers.Pooling(
        net.conv4,
        pool=caffe.params.Pooling.MAX,
        kernel_size=2,
        stride=2
    )
    net.conv5 = caffe.layers.Convolution(
        net.pool2,
        param=froozen_param,
        num_output=128,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
    )
    net.relu5 = caffe.layers.ReLU(
        net.conv5,
        in_place=True
    )
    net.conv6 = caffe.layers.Convolution(
        net.conv5,
        param=froozen_param,
        num_output=128,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
    )
    net.relu6 = caffe.layers.ReLU(
        net.conv6,
        in_place=True
    )
    net.pool3 = caffe.layers.Pooling(
        net.conv6,
        pool=caffe.params.Pooling.MAX,
        kernel_size=2,
        stride=2
    )
    net.conv7 = caffe.layers.Convolution(
        net.pool3,
        param=froozen_param,
        num_output=256,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
    )
    net.relu7 = caffe.layers.ReLU(
        net.conv7,
        in_place=True
    )
    net.conv8 = caffe.layers.Convolution(
        net.conv7,
        param=froozen_param,
        num_output=256,
        pad=1,
        kernel_size=3,
        stride=1,

        weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
    )

    net.relu8 = caffe.layers.ReLU(
        net.conv8,
        in_place=True
    )
    net.pool4 = caffe.layers.Pooling(
        net.conv8,
        pool=caffe.params.Pooling.MAX,
        kernel_size=2,
        stride=2
    )
    net.ip1=caffe.layers.InnerProduct(
                net.pool4,
                param=froozen_param,
                num_output=256,
                weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
                )
    net.drop1=caffe.layers.Dropout(
            net.ip1,
            in_place=True,
            dropout_param=dict(dropout_ratio=0.5)
            )
    net.ip2=caffe.layers.InnerProduct(
                net.ip1,
                param=froozen_param,
                num_output=256,
                weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
                )
    net.relu9 = caffe.layers.ReLU(
        net.ip2,
        in_place=True
    )
    net.drop2 = caffe.layers.Dropout(
        net.ip2,
        in_place=True,
        dropout_param=dict(dropout_ratio=0.5)
    )
    net.ip3=caffe.layers.InnerProduct(
                net.ip2,
                param=froozen_param,
                num_output=2,
                weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
                )

    net.loss=caffe.layers.SoftmaxWithLoss(net.ip3,net.label)
    net.accuracy=caffe.layers.Accuracy(
        net.ip3,
        net.label,
        include=dict(phase=caffe.TEST)
    )


    return net.to_proto()

with open('train.prototxt', 'w') as f:
    f.write(str(write_layer()))



生成的train.prototxt的内容:


layer {
  name: "catdog"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00392156885937
  }
  data_param {
    source: "/home/xuy/\346\241\214\351\235\242/code/python/caffe_code/kaggle_dogcat_classification/img_train.lmdb"
    batch_size: 16
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 32
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "conv1"
  top: "conv2"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 32
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv2"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool1"
  top: "conv3"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv4"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "pool2"
  top: "conv5"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "conv6"
  type: "Convolution"
  bottom: "conv5"
  top: "conv6"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "conv6"
  top: "conv6"
}
layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv6"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv7"
  type: "Convolution"
  bottom: "pool3"
  top: "conv7"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "conv7"
  top: "conv7"
}
layer {
  name: "conv8"
  type: "Convolution"
  bottom: "conv7"
  top: "conv8"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu8"
  type: "ReLU"
  bottom: "conv8"
  top: "conv8"
}
layer {
  name: "pool4"
  type: "Pooling"
  bottom: "conv8"
  top: "pool4"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool4"
  top: "ip1"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  inner_product_param {
    num_output: 256
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "drop1"
  type: "Dropout"
  bottom: "ip1"
  top: "ip1"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  inner_product_param {
    num_output: 256
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu9"
  type: "ReLU"
  bottom: "ip2"
  top: "ip2"
}
layer {
  name: "drop2"
  type: "Dropout"
  bottom: "ip2"
  top: "ip2"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "ip3"
  type: "InnerProduct"
  bottom: "ip2"
  top: "ip3"
  param {
    lr_mult: 1.0
  }
  param {
    lr_mult: 2.0
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "ip3"
  bottom: "label"
  top: "loss"
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "ip3"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}



最后附上solver.prototxt的定义:

net:"你的路径"
test_iter:50#test.prototxt的batch_size是16,因此每进行一次epoth,读取50*16=800张图片
test_interval:500#每进行500次训练进行一次测试
base_lr:0.0001#推荐看2018年fast ai当中的视频,find_lr()函数可以找出最优的lr

momentum:0.9
weight_decay:0.0005
lr_policy:"fixed"
gamma:0.0001
power:0.75
display:100#每100次迭代显示一次
max_iter:100000
snapshot:5000
snapshot_prefix:example/catdog/catdog
solver_mode:GPU