1. 蝴蝶识别分类任务概述
2. 创建项目和挂载数据
3. 初探蝴蝶数据集
第一步,把当前路径转换到data目录,可以使用命令!cd data。在AI studio nootbook中可以使用Linux命令,需要在命令的最前面加上英文的感叹号(!)。用&&可以连接两个命令。用\号可以换行写代码。需要注意的是,每次重新打开该项目,data文件夹下除了挂载的数据集,其他文件都会被清空。因此,如果把数据保存在data目录中,每次重新启动项目时,都需要解压缩一下。如果想省事持久化保存,可以把数据保存在work目录下。
!cd data &&\
unzip -qo data63004/Butterfly20_test.zip &&\
unzip -qo data63004/Butterfly20.zip &&\
rm -r __MACOSX
我们绘制data/Butterfly20/001.Atrophaneura_horishanus文件夹下的图片006.jpg。根据百度百科,Atrophaneura horishanus是凤蝶科、曙凤蝶属的一个物种。
我们再绘制data/Butterfly20/002.Atrophaneura_varuna文件夹下的图片006.jpg。根据百度百科,Atrophaneura varuna对应的中文名称是“瓦曙凤蝶”,它是凤蝶科、曙凤蝶属的另一个物种。
import matplotlib.pyplot as plt
import PIL.Image as Image
img = Image.open(path)
plt.imshow(img) #根据数组绘制图像
plt.show() #显示图像
<Figure size 640x480 with 1 Axes>
img = Image.open(path)
plt.imshow(img) #根据数组绘制图像
plt.show() #显示图像
更具挑战的是,即便属于同一属种,不同的蝴蝶图片在角度、明暗、背景、姿态、颜色等方面均存在不小差别。甚至有的图片里面有多只蝴蝶。以下两张蝴蝶图片均出自同一个属种Atrophaneura horishanus。
img1 = Image.open(path1)
plt.imshow(img1) #根据数组绘制图像
img2 = Image.open(path2)
plt.imshow(img2) #根据数组绘制图像
plt.show() #显示图像
4. 准备数据
import os
import random
data_list = [] #用个列表保存每个样本的读取路径、标签
with open("/home/aistudio/data/species.txt") as f:
for line in f:
a,b = line.strip("\n").split(" ")
label_list.append([b, int(a)-1])
label_dic = dict(label_list)
class_list = os.listdir("/home/aistudio/data/Butterfly20")
class_list.remove('.DS_Store') #删掉列表中名为.DS_Store的元素,因为.DS_Store并没有样本。
for each in class_list:
for f in os.listdir("/home/aistudio/data/Butterfly20/"+each):
#打印前十个,可以看出data_list列表中的每个元素是[样本读取路径, 样本标签]。
[['/home/aistudio/data/Butterfly20/005.Byasa_polyeuctes/044.jpg', 4], ['/home/aistudio/data/Butterfly20/020.Papilio_hermosanus/063.jpg', 19], ['/home/aistudio/data/Butterfly20/019.Papilio_dialis/013.jpg', 18], ['/home/aistudio/data/Butterfly20/005.Byasa_polyeuctes/145.jpg', 4], ['/home/aistudio/data/Butterfly20/011.Lamproptera_meges/024.jpg', 10], ['/home/aistudio/data/Butterfly20/003.Byasa_alcinous/111.jpg', 2], ['/home/aistudio/data/Butterfly20/017.Papilio_arcturus/064.jpg', 16], ['/home/aistudio/data/Butterfly20/010.Lamproptera_curius/039.jpg', 9], ['/home/aistudio/data/Butterfly20/001.Atrophaneura_horishanus/127.jpg', 0], ['/home/aistudio/data/Butterfly20/005.Byasa_polyeuctes/115.jpg', 4]]
import paddle
from paddle.vision.transforms import Compose, ColorJitter, Resize,Transpose, Normalize, RandomHorizontalFlip, RandomRotation
import cv2
import numpy as np
from PIL import Image
from paddle.io import Dataset
def preprocess(img, is_val):
if is_val:
transform = Compose([
Resize(size=(224, 224)), #把数据长宽像素调成224*224
Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], data_format='HWC'), #标准化
Transpose(), #原始数据形状维度是HWC格式,经过Transpose,转换为CHW格式
transform = Compose([
Resize(size=(224, 224)), #把数据长宽像素调成224*224
#ColorJitter(0.4, 0.4, 0.4, 0.4),
Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], data_format='HWC'), #标准化
Transpose(), #原始数据形状维度是HWC格式,经过Transpose,转换为CHW格式
img = transform(img).astype("float32")
return img
class Reader(Dataset):
def __init__(self, data, is_val=False):
super(Reader, self).__init__()
self.is_val = is_val
self.samples = data[-int(len(data)*0.2):] if self.is_val else data[:-int(len(data)*0.2)]
def __getitem__(self, idx):
img_path = self.samples[idx][0] #得到某样本的路径
img = Image.open(img_path)
if img.mode != 'RGB':
img = img.convert('RGB')
img = preprocess(img, self.is_val) #数据预处理--这里仅包括简单数据预处理,没有用到数据增强
label = self.samples[idx][1] #得到某样本的标签
label = np.array([label], dtype="int64") #把标签数据类型转成int64
return img, label
def __len__(self):
return len(self.samples)
train_dataset = Reader(data_list, is_val=False)
eval_dataset = Reader(data_list, is_val=True)
(3, 224, 224)
5. 建立模型
class MyNet(paddle.nn.Layer):
def __init__(self):
self.layer=paddle.vision.models.resnet101(num_classes=20, pretrained=True)
#self.fc = paddle.nn.Linear(1000, 20)
def forward(self,x):
return x
6. 应用高阶API训练模型
二是实例化模型。如果要用高阶API,需要用Paddle.Model()对模型进行封装,如model = paddle.Model(model,inputs=input_define,labels=label_define)。
total_images = len(train_dataset)
batch_size = 64
EPOCHS = 100
input_define = paddle.static.InputSpec(shape=[-1,3,224,224], dtype="float32", name="img")
label_define = paddle.static.InputSpec(shape=[-1,1], dtype="int64", name="label")
model = MyNet()
model = paddle.Model(model,inputs=input_define,labels=label_define) #用Paddle.Model()对模型进行封装
optimizer = paddle.optimizer.Adam(learning_rate=0.00005, parameters=model.parameters(), weight_decay=5e-4)
model.prepare(optimizer=optimizer, #指定优化器
loss=paddle.nn.CrossEntropyLoss(), #指定损失函数
metrics=paddle.metric.Accuracy()) #指定评估方法
visualdl = paddle.callbacks.VisualDL(log_dir='visualdl_log')
early_stop = paddle.callbacks.EarlyStopping(
model.fit(train_data=train_dataset, #训练数据集
eval_data=eval_dataset, #测试数据集
batch_size=batch_size, #一个批次的样本数量
epochs=EPOCHS, #迭代轮次
save_dir="/home/aistudio/res101", #把模型参数、优化器参数保存至自定义的文件夹
save_freq=10, #设定每隔多少个epoch保存模型参数及优化器参数
callbacks=[visualdl, early_stop]
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py:1263: UserWarning: Skip loading for fc.weight. fc.weight receives a shape [2048, 1000], but the expected shape is [2048, 20].
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py:1263: UserWarning: Skip loading for fc.bias. fc.bias receives a shape [1000], but the expected shape is [20].
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
The loss value printed in the log is the current step, and the metric is the average value of previous step.
Epoch 1/100
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:77: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
return (isinstance(seq, collections.Sequence) and
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/nn/layer/norm.py:636: UserWarning: When training, we now always track global mean and variance.
"When training, we now always track global mean and variance.")
step 24/24 [==============================] - loss: 1.2169 - acc: 0.3691 - 525ms/step
save checkpoint at /home/aistudio/res101/0
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 1.3712 - acc: 0.6702 - 515ms/step
Eval samples: 373
Epoch 2/100
step 24/24 [==============================] - loss: 0.5580 - acc: 0.7736 - 503ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.8005 - acc: 0.8257 - 516ms/step
Eval samples: 373
Epoch 3/100
step 24/24 [==============================] - loss: 0.4082 - acc: 0.8868 - 504ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.6704 - acc: 0.8579 - 489ms/step
Eval samples: 373
Epoch 4/100
step 24/24 [==============================] - loss: 0.3082 - acc: 0.9196 - 543ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.6108 - acc: 0.8579 - 473ms/step
Eval samples: 373
Epoch 5/100
step 24/24 [==============================] - loss: 0.3136 - acc: 0.9431 - 526ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.5348 - acc: 0.8794 - 506ms/step
Eval samples: 373
Epoch 6/100
step 24/24 [==============================] - loss: 0.1325 - acc: 0.9692 - 526ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.4887 - acc: 0.9008 - 477ms/step
Eval samples: 373
Epoch 7/100
step 24/24 [==============================] - loss: 0.0646 - acc: 0.9725 - 524ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.4627 - acc: 0.9088 - 519ms/step
Eval samples: 373
Epoch 8/100
step 24/24 [==============================] - loss: 0.3438 - acc: 0.9739 - 505ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.4580 - acc: 0.9088 - 535ms/step
Eval samples: 373
Epoch 9/100
step 24/24 [==============================] - loss: 0.1872 - acc: 0.9833 - 533ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.4294 - acc: 0.9223 - 520ms/step
Eval samples: 373
Epoch 10/100
step 24/24 [==============================] - loss: 0.1158 - acc: 0.9853 - 517ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.5582 - acc: 0.8820 - 520ms/step
Eval samples: 373
Epoch 11/100
step 24/24 [==============================] - loss: 0.1884 - acc: 0.9833 - 514ms/step
save checkpoint at /home/aistudio/res101/10
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.4652 - acc: 0.8981 - 514ms/step
Eval samples: 373
Epoch 12/100
step 24/24 [==============================] - loss: 0.1016 - acc: 0.9853 - 558ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.5159 - acc: 0.8928 - 505ms/step
Eval samples: 373
Epoch 13/100
step 24/24 [==============================] - loss: 0.0344 - acc: 0.9846 - 554ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.5684 - acc: 0.9142 - 526ms/step
Eval samples: 373
Epoch 14/100
step 24/24 [==============================] - loss: 0.0403 - acc: 0.9926 - 553ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.5465 - acc: 0.8954 - 521ms/step
Eval samples: 373
Epoch 15/100
step 24/24 [==============================] - loss: 0.0875 - acc: 0.9879 - 537ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.6833 - acc: 0.9115 - 526ms/step
Eval samples: 373
Epoch 16/100
step 24/24 [==============================] - loss: 0.0885 - acc: 0.9893 - 550ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.6319 - acc: 0.9249 - 529ms/step
Eval samples: 373
Epoch 17/100
step 24/24 [==============================] - loss: 0.0497 - acc: 0.9926 - 546ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.6873 - acc: 0.9196 - 495ms/step
Eval samples: 373
Epoch 18/100
step 24/24 [==============================] - loss: 0.0496 - acc: 0.9906 - 521ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.7625 - acc: 0.9062 - 509ms/step
Eval samples: 373
Epoch 19/100
step 24/24 [==============================] - loss: 0.0661 - acc: 0.9933 - 512ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.7611 - acc: 0.9062 - 473ms/step
Eval samples: 373
Epoch 20/100
step 24/24 [==============================] - loss: 0.0106 - acc: 0.9900 - 513ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.4422 - acc: 0.9035 - 523ms/step
Eval samples: 373
Epoch 21/100
step 24/24 [==============================] - loss: 0.0104 - acc: 0.9893 - 524ms/step
save checkpoint at /home/aistudio/res101/20
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.7928 - acc: 0.8767 - 459ms/step
Eval samples: 373
Epoch 22/100
step 24/24 [==============================] - loss: 0.0104 - acc: 0.9926 - 503ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.6914 - acc: 0.8981 - 466ms/step
Eval samples: 373
Epoch 23/100
step 24/24 [==============================] - loss: 0.0236 - acc: 0.9933 - 500ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.6120 - acc: 0.9062 - 506ms/step
Eval samples: 373
Epoch 24/100
step 24/24 [==============================] - loss: 0.0147 - acc: 0.9926 - 524ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.7424 - acc: 0.9035 - 526ms/step
Eval samples: 373
Epoch 25/100
step 24/24 [==============================] - loss: 0.0102 - acc: 0.9920 - 500ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.7092 - acc: 0.8954 - 518ms/step
Eval samples: 373
Epoch 26/100
step 24/24 [==============================] - loss: 0.0277 - acc: 0.9946 - 518ms/step
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 6/6 [==============================] - loss: 0.7330 - acc: 0.9062 - 480ms/step
Eval samples: 373
Epoch 26: Early stopping.
Best checkpoint has been saved at /home/aistudio/res101/best_model
save checkpoint at /home/aistudio/res101/final
result = model.evaluate(eval_dataset, verbose=1)
Eval begin...
The loss value printed in the log is the current batch, and the metric is the average value of previous step.
step 373/373 [==============================] - loss: 2.9440e-04 - acc: 0.9249 - 60ms/step
Eval samples: 373
{'loss': [0.0002944036], 'acc': 0.9249329758713136}
7. 应用已经训练好的模型进行预测
class InferDataset(Dataset):
def __init__(self, img_path=None):
:param img_path: 推理单张图片
if img_path:
self.img_paths = [img_path]
raise Exception("请指定需要预测对应图片路径")
def __getitem__(self, index):
# 获取图像路径
img_path = self.img_paths[index]
# 使用Pillow来读取图像数据并转成Numpy格式
img = Image.open(img_path)
if img.mode != 'RGB':
img = img.convert('RGB')
img = preprocess(img, True) #数据预处理--这里仅包括简单数据预处理,没有用到数据增强
return img
def __len__(self):
return len(self.img_paths)
model = paddle.Model(MyNet(),inputs=input_define)
with open("/home/aistudio/data/testpath.txt") as file_pred:
for line in file_pred:
def get_label_dict2():
with open("/home/aistudio/data/species.txt") as filess:
for line in filess:
a,b = line.strip("\n").split(" ")
label_list2.append([int(a)-1, b])
label_dic2 = dict(label_list2)
return label_dic2
label_dict2 = get_label_dict2()
for infer_path in infer_list:
infer_data = InferDataset(infer_path)
result = model.predict(test_data=infer_data)[0] #关键代码,实现预测功能
result = paddle.to_tensor(result)
result = np.argmax(result.numpy()) #获得最大值所在的序号
results.append("{}".format(label_dict2[result])) #查找该序号所对应的标签名字
with open("work/result.txt", "w") as f:
for r in results:
