YOLOv5代码注释版更新啦,注释的是最近的2021.07.14的版本,且注释更全
github: https:///Laughing-q/yolov5_annotations
YOLOV5测试代码test.py注释与解析
- 测试参数以及main函数解析
- test函数解析
本文主要对ultralytics\yolov5的测试代码test.py的解析,由于yolov5还在开发当中,平常多多少少都会修复一些bug或者有一些代码和功能的更新,但基本上不会有很大的改动,故以下注释与解析都是适用的;当然如果有大改动,笔者也会更新注释。
测试参数以及main函数解析
if __name__ == '__main__':
"""
opt参数详解
weights:测试的模型权重文件
data:数据集配置文件,数据集路径,类名等
batch-size:前向传播时的批次, 默认32
img-size:输入图片分辨率大小, 默认640
conf-thres:筛选框的时候的置信度阈值, 默认0.001
iou-thres:进行NMS的时候的IOU阈值, 默认0.65
save-json:是否按照coco的json格式保存预测框,并且使用cocoapi做评估(需要同样coco的json格式的标签), 默认False
task:设置测试形式, 默认val, 具体可看下面代码解析注释
device:测试的设备,cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)
single-cls:数据集是否只有一个类别,默认False
augment:测试时是否使用TTA(test time augmentation), 默认False
merge:在进行NMS时,是否通过合并方式获得预测框, 默认False
verbose:是否打印出每个类别的mAP, 默认False
save-txt:是否以txt文件的形式保存模型预测的框坐标, 默认False
"""
parser = argparse.ArgumentParser(prog='test.py')
parser.add_argument('--weights', nargs='+', type=str, default='runs/exp0/weights/last.pt', help='model.pt path(s)')
parser.add_argument('--data', type=str, default='data/mask.yaml', help='*.data path')
parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.60, help='IOU threshold for NMS')
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--merge', action='store_true', help='use Merge NMS')
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
opt = parser.parse_args()
# 设置参数save_json
opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
# check_file检查文件是否存在
opt.data = check_file(opt.data) # check file
print(opt)
# task = 'val', 'test', 'study'
# task = ['val', 'test']时就正常测试验证集、测试集
if opt.task in ['val', 'test']: # (default) run normally
test(opt.data,
opt.weights,
opt.batch_size,
opt.img_size,
opt.conf_thres,
opt.iou_thres,
opt.save_json,
opt.single_cls,
opt.augment,
opt.verbose)
# task == 'study'时,就评估yolov5系列和yolov3-spp各个模型在各个尺度下的指标并可视化
elif opt.task == 'study': # run over a range of settings and save/plot
for weights in ['', '', '', '', '']:
f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to
x = list(range(352, 832, 64)) # x axis
y = [] # y axis
for i in x: # img-size
print('\nRunning %s point %s...' % (f, i))
r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)
y.append(r + t) # results and times
np.savetxt(f, y, fmt='%10.4g') # save
os.system('zip -r study.zip study_*.txt')
plot_study_txt(f, x) # plot
test函数解析
import argparse
import json
from models.experimental import *
from utils.datasets import *
def test(data,
weights=None,
batch_size=16,
imgsz=640,
conf_thres=0.001,
iou_thres=0.6, # for NMS
save_json=False,
single_cls=False,
augment=False,
verbose=False,
model=None,
dataloader=None,
save_dir='',
merge=False,
save_txt=False):
"""
:param data:
:param weights:
:param batch_size:
:param imgsz:
:param conf_thres:
:param iou_thres:
:param save_json:
:param single_cls:
:param augment:
:param verbose:
:param model: 测试的模型,训练时调用test传入
:param dataloader: 测试集的dataloader,训练时调用test传入
:param save_dir: 保存在测试时第一个batch的图片上画出标签框和预测框的图片路径
:param merge:
:param save_txt:
:return:
"""
# Initialize/load model and set device
# 判断是否在训练时调用test,如果是则获取训练时的设备
training = model is not None
if training: # called by train.py
device = next(model.parameters()).device # get model device
else: # called directly
# 选择设备
device = torch_utils.select_device(opt.device, batch_size=batch_size)
merge, save_txt = opt.merge, opt.save_txt # use Merge NMS, save *.txt labels
if save_txt:
out = Path('inference/output')
if os.path.exists(out):
shutil.rmtree(out) # delete output folder
os.makedirs(out) # make new output folder
# Remove previous
# 删除之前的test_batch0_gt.jpg和test_batch0_pred.jpg
for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):
os.remove(f)
# Load model
# 加载模型
model = attempt_load(weights, map_location=device) # load FP32 model
# 检查输入图片分辨率是否能被32整除
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
# Multi-GPU disabled, incompatible with .half() https:///ultralytics/yolov5/issues/99
# if device.type != 'cpu' and torch.cuda.device_count() > 1:
# model = nn.DataParallel(model)
# Half
# 如果设备不是cpu并且gpu数目为1,则将模型由Float32转为Float16,提高前向传播的速度
half = device.type != 'cpu' and torch.cuda.device_count() == 1 # half precision only supported on single-GPU
if half:
model.half() # to FP16
# Configure
model.eval()
# 加载数据配置信息
with open(data) as f:
data = yaml.load(f, Loader=yaml.FullLoader) # model dict
nc = 1 if single_cls else int(data['nc']) # number of classes
# 设置iou阈值,从0.5~0.95,每间隔0.05取一次
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
# iou个数
niou = iouv.numel()
# Dataloader
if not training:
# 创建一个全0数组测试一下前向传播是否正常运行
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
# 获取图片路径
path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
# 创建dataloader
# 注意这里rect参数为True,yolov5的测试评估是基于矩形推理的
dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt,
hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]
# 初始化测试的图片数量
seen = 0
# 获取类别的名字
names = model.names if hasattr(model, 'names') else model.module.names
"""
获取coco数据集的类别索引
这里要说明一下,coco数据集有80个类别(索引范围应该为0~79),
但是他的索引却属于0~90(笔者是通过查看coco数据测试集的json文件发现的,具体原因不知)
coco80_to_coco91_class()就是为了与上述索引对应起来,返回一个范围在0~90的索引数组
"""
coco91class = coco80_to_coco91_class()
# 设置tqdm进度条的显示信息
s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
# 初始化指标,时间
p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
# 初始化测试集的损失
loss = torch.zeros(3, device=device)
# 初始化json文件的字典,统计信息,ap
jdict, stats, ap, ap_class = [], [], [], []
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
img = img.to(device)
# 图片也由Float32->Float16
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
targets = (device)
nb, _, height, width = img.shape # batch size, channels, height, width
whwh = torch.Tensor([width, height, width, height]).to(device)
# Disable gradients
with torch.no_grad():
# Run model
"""
time_synchronized()函数里面进行了torch.cuda.synchronize(),再返回的time.time()
torch.cuda.synchronize()等待gpu上完成所有的工作
总的来说就是这样测试时间会更准确
"""
t = torch_utils.time_synchronized()
# 前向传播
# inf_out为预测结果, train_out训练结果
inf_out, train_out = model(img, augment=augment) # inference and training outputs
# t0累计前向传播的时间
t0 += torch_utils.time_synchronized() - t
# Compute loss
# 如果是在训练时进行的test,则通过训练结果计算并返回测试集的GIoU, obj, cls损失
if training: # if model has loss hyperparameters
loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls
# Run NMS
# t1累计后处理NMS的时间
t = torch_utils.time_synchronized()
"""
non_max_suppression进行非极大值抑制;
conf_thres为置信度阈值,iou_thres为iou阈值
merge为是否合并框
"""
output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)
t1 += torch_utils.time_synchronized() - t
# Statistics per image
# 为每一张图片做统计, 写入预测信息到txt文件, 生成json文件字典, 统计tp等
for si, pred in enumerate(output):
# 获取第si张图片的标签信息, 包括class,x,y,w,h
# targets[:, 0]为标签属于哪一张图片的编号
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
# 获取标签类别
tcls = labels[:, 0].tolist() if nl else [] # target class
# 统计测试图片数量
seen += 1
# 如果预测为空,则添加空的信息到stats里
if pred is None:
if nl:
stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
continue
# Append to text file
# 保存预测结果为txt文件
if save_txt:
# 获得对应图片的长和宽
gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh
# 根据图片名字设置txt文件的路径
txt_path = str(out / Path(paths[si]).stem)
# 将预测框的坐标调整到基于其原本长宽的坐标
pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], shapes[si][0],
shapes[si][1]) # to original
for *xyxy, conf, cls in pred:
# xyxy格式->xywh, 并对坐标进行归一化处理
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
# 保存预测类别和坐标到txt文件
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format
# Clip boxes to image bounds
# 修正预测坐标到图片内部
clip_coords(pred, (height, width))
# Append to pycocotools JSON dictionary
# 保存coco格式的json文件字典
if save_json:
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
# coco格式json文件大概包含信息如上
# 获取图片id
image_id = int(Path(paths[si]).stem.split('_')[-1])
# 获取框
box = pred[:, :4].clone() # xyxy
# 将框调整为基于原图大小的
scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
# 转换为xywh格式
box = xyxy2xywh(box) # xywh
"""
值得注意的是,之前所说的xyxy格式为左上角右下角的坐标
xywh是中心点坐标和长和宽
而coco的json格式中的框坐标格式为xywh,此处的xy为左上角坐标
也就是coco的json格式的坐标格式为:左上角坐标+长宽
所以下面一行代码就是将:中心点坐标->左上角
"""
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
"""
image_id:图片id, 即属于哪张图
category_id: 类别, coco91class()从索引0~79映射到索引0~90
bbox:框的坐标
score:置信度
"""
for p, b in zip(pred.tolist(), box.tolist()):
jdict.append({'image_id': image_id,
'category_id': coco91class[int(p[5])],
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5)})
# Assign all predictions as incorrect
# 初始化预测评定,niou为iou阈值的个数
correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
if nl:
# detected用来存放已检测到的目标
detected = [] # target indices
tcls_tensor = labels[:, 0]
# target boxes
# 获得xyxy格式的框并乘以wh
tbox = xywh2xyxy(labels[:, 1:5]) * whwh
# Per target class
# 对图片中的每个类单独处理
for cls in torch.unique(tcls_tensor):
# 标签框该类别的索引
ti = (cls == tcls_tensor).nonzero().view(-1) # prediction indices
# 预测框该类别的索引
pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices
# Search for detections
if pi.shape[0]:
# Prediction to target ious
# box_iou计算预测框与标签框的iou值,max(1)选出最大的ious值,i为对应的索引
"""
pred shape[N, 4]
tbox shape[M, 4]
box_iou shape[N, M]
ious shape[N, 1]
i shape[N, 1], i里的值属于0~M
"""
ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices
# Append detections
for j in (ious > iouv[0]).nonzero():
# 获得检测到的目标
d = ti[i[j]] # detected target
if d not in detected:
# 添加d到detected
detected.append(d)
# iouv为以0.05为步长 0.5到0.95的序列
# 获得不同iou阈值下的true positive
correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn
if len(detected) == nl: # all targets already located in image
break
# Append statistics (correct, conf, pcls, tcls)
# 每张图片的结果统计到stats里
stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
# Plot images
# 画出第1个batch的图片的ground truth和预测框并保存
if batch_i < 1:
f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename
plot_images(img, targets, paths, str(f), names) # ground truth
f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions
# Compute statistics
# 将stats列表的信息拼接到一起
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
if len(stats):
# 根据上面得到的tp等信息计算指标
# 精准度TP/TP+FP,召回率TP/P,map,f1分数,类别
p, r, ap, f1, ap_class = ap_per_class(*stats)
p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95]
mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
# nt是一个列表,测试集每个类别有多少个标签框
nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
else:
nt = torch.zeros(1)
# Print results
# 打印指标结果
pf = '%20s' + '%12.3g' * 6 # print format
print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
# Print results per class
# 细节展示每一个类别的指标
if verbose and nc > 1 and len(stats):
for i, c in enumerate(ap_class):
print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
# Print speeds
# 打印前向传播耗费的时间、nms的时间、总时间
t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple
if not training:
print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
# Save JSON
# 采用之前保存的json格式预测结果,通过cocoapi评估指标
# 需要注意的是 测试集的标签也需要转成coco的json格式
if save_json and map50 and len(jdict):
# 获取图片id
imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]
# 获取预测框的json文件路径并打开
f = 'detections_val2017_%s_results.json' % \
(weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '') # filename
print('\nCOCO mAP with pycocotools... saving %s...' % f)
with open(f, 'w') as file:
json.dump(jdict, file)
try:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
# https:///cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
# 获取并初始化测试集标签的json文件
cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api
# 初始化预测框的文件
cocoDt = cocoGt.loadRes(f) # initialize COCO pred api
# 创建评估器
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.params.imgIds = imgIds # image IDs to evaluate
# 评估
cocoEval.evaluate()
cocoEval.accumulate()
# 展示结果
cocoEval.summarize()
map, map50 = cocoEval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
except:
print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '
'See https:///cocodataset/cocoapi/issues/356')
# Return results
# 返回测试指标结果
model.float() # for training
maps = np.zeros(nc) + map
for i, c in enumerate(ap_class):
maps[c] = ap[i]
return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
以上我根据ultralytics\yolov5的train.py代码对其整体流程做一个梳理,讲解每个部分的代码的作用,但是对于一些细节函数还没做详细解析,就比如说计做NMS处理的non_max_suppression()函数等,这些函数在utils.py文件里,之后更新解析utils.py。