yolov5pytorch怎么使用 yolov5 test.py

转载

风轻云淡的开发 2023-11-02 07:01:08

文章标签 yolov5pytorch怎么使用深度学习 pytorch 计算机视觉人工智能 文章分类 PyTorch 人工智能

YOLOv5代码注释版更新啦，注释的是最近的2021.07.14的版本，且注释更全
github: https:///Laughing-q/yolov5_annotations

YOLOV5测试代码test.py注释与解析

测试参数以及main函数解析
test函数解析

本文主要对ultralytics\yolov5的测试代码test.py的解析，由于yolov5还在开发当中，平常多多少少都会修复一些bug或者有一些代码和功能的更新，但基本上不会有很大的改动，故以下注释与解析都是适用的；当然如果有大改动，笔者也会更新注释。

测试参数以及main函数解析

if __name__ == '__main__':
    """
    opt参数详解
    weights:测试的模型权重文件
    data:数据集配置文件，数据集路径，类名等
    batch-size:前向传播时的批次, 默认32
    img-size:输入图片分辨率大小, 默认640
    conf-thres:筛选框的时候的置信度阈值, 默认0.001
    iou-thres:进行NMS的时候的IOU阈值, 默认0.65
    save-json:是否按照coco的json格式保存预测框，并且使用cocoapi做评估(需要同样coco的json格式的标签), 默认False
    task:设置测试形式, 默认val, 具体可看下面代码解析注释
    device:测试的设备，cpu；0(表示一个gpu设备cuda:0)；0,1,2,3(多个gpu设备)
    single-cls:数据集是否只有一个类别，默认False
    augment:测试时是否使用TTA(test time augmentation), 默认False
    merge:在进行NMS时，是否通过合并方式获得预测框, 默认False
    verbose:是否打印出每个类别的mAP, 默认False
    save-txt:是否以txt文件的形式保存模型预测的框坐标, 默认False
    """
    parser = argparse.ArgumentParser(prog='test.py')
    parser.add_argument('--weights', nargs='+', type=str, default='runs/exp0/weights/last.pt', help='model.pt path(s)')
    parser.add_argument('--data', type=str, default='data/mask.yaml', help='*.data path')
    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
    parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.60, help='IOU threshold for NMS')
    parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
    parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--merge', action='store_true', help='use Merge NMS')
    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    opt = parser.parse_args()
    # 设置参数save_json
    opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
    # check_file检查文件是否存在
    opt.data = check_file(opt.data)  # check file
    print(opt)

    # task = 'val', 'test', 'study'
    # task = ['val', 'test']时就正常测试验证集、测试集
    if opt.task in ['val', 'test']:  # (default) run normally
        test(opt.data,
             opt.weights,
             opt.batch_size,
             opt.img_size,
             opt.conf_thres,
             opt.iou_thres,
             opt.save_json,
             opt.single_cls,
             opt.augment,
             opt.verbose)

    # task == 'study'时，就评估yolov5系列和yolov3-spp各个模型在各个尺度下的指标并可视化
    elif opt.task == 'study':  # run over a range of settings and save/plot
        for weights in ['', '', '', '', '']:
            f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem)  # filename to save to
            x = list(range(352, 832, 64))  # x axis
            y = []  # y axis
            for i in x:  # img-size
                print('\nRunning %s point %s...' % (f, i))
                r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)
                y.append(r + t)  # results and times
            np.savetxt(f, y, fmt='%10.4g')  # save
        os.system('zip -r study.zip study_*.txt')
        plot_study_txt(f, x)  # plot

test函数解析

import argparse
import json

from models.experimental import *
from utils.datasets import *


def test(data,
         weights=None,
         batch_size=16,
         imgsz=640,
         conf_thres=0.001,
         iou_thres=0.6,  # for NMS
         save_json=False,
         single_cls=False,
         augment=False,
         verbose=False,
         model=None,
         dataloader=None,
         save_dir='',
         merge=False,
         save_txt=False):
    """
    :param data:
    :param weights:
    :param batch_size:
    :param imgsz:
    :param conf_thres:
    :param iou_thres:
    :param save_json:
    :param single_cls:
    :param augment:
    :param verbose:
    :param model: 测试的模型，训练时调用test传入
    :param dataloader: 测试集的dataloader，训练时调用test传入
    :param save_dir: 保存在测试时第一个batch的图片上画出标签框和预测框的图片路径
    :param merge:
    :param save_txt:
    :return:
    """
    # Initialize/load model and set device
    # 判断是否在训练时调用test，如果是则获取训练时的设备
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        # 选择设备
        device = torch_utils.select_device(opt.device, batch_size=batch_size)
        merge, save_txt = opt.merge, opt.save_txt  # use Merge NMS, save *.txt labels
        if save_txt:
            out = Path('inference/output')
            if os.path.exists(out):
                shutil.rmtree(out)  # delete output folder
            os.makedirs(out)  # make new output folder

        # Remove previous
        # 删除之前的test_batch0_gt.jpg和test_batch0_pred.jpg
        for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):
            os.remove(f)

        # Load model
        # 加载模型
        model = attempt_load(weights, map_location=device)  # load FP32 model
        # 检查输入图片分辨率是否能被32整除
        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

        # Multi-GPU disabled, incompatible with .half() https:///ultralytics/yolov5/issues/99
        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
        #     model = nn.DataParallel(model)

    # Half
    # 如果设备不是cpu并且gpu数目为1，则将模型由Float32转为Float16，提高前向传播的速度
    half = device.type != 'cpu' and torch.cuda.device_count() == 1  # half precision only supported on single-GPU
    if half:
        model.half()  # to FP16

    # Configure
    model.eval（)
    # 加载数据配置信息
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    # 设置iou阈值，从0.5~0.95，每间隔0.05取一次
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
    # iou个数
    niou = iouv.numel()

    # Dataloader
    if not training:
        # 创建一个全0数组测试一下前向传播是否正常运行
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
        # 获取图片路径
        path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images
        # 创建dataloader
        # 注意这里rect参数为True，yolov5的测试评估是基于矩形推理的
        dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt,
                                       hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]

    # 初始化测试的图片数量
    seen = 0
    # 获取类别的名字
    names = model.names if hasattr(model, 'names') else model.module.names
    """
    获取coco数据集的类别索引
    这里要说明一下，coco数据集有80个类别(索引范围应该为0~79)，
    但是他的索引却属于0~90(笔者是通过查看coco数据测试集的json文件发现的，具体原因不知)
    coco80_to_coco91_class()就是为了与上述索引对应起来，返回一个范围在0~90的索引数组
    """
    coco91class = coco80_to_coco91_class()
    # 设置tqdm进度条的显示信息
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
    # 初始化指标，时间
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    # 初始化测试集的损失
    loss = torch.zeros(3, device=device)
    # 初始化json文件的字典，统计信息，ap
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.to(device)
        # 图片也由Float32->Float16
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = (device)
        nb, _, height, width = img.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Disable gradients
        with torch.no_grad():
            # Run model
            """
            time_synchronized()函数里面进行了torch.cuda.synchronize()，再返回的time.time()
            torch.cuda.synchronize()等待gpu上完成所有的工作
            总的来说就是这样测试时间会更准确 
            """
            t = torch_utils.time_synchronized()
            # 前向传播
            # inf_out为预测结果, train_out训练结果
            inf_out, train_out = model(img, augment=augment)  # inference and training outputs
            # t0累计前向传播的时间
            t0 += torch_utils.time_synchronized() - t

            # Compute loss
            # 如果是在训练时进行的test，则通过训练结果计算并返回测试集的GIoU, obj, cls损失
            if training:  # if model has loss hyperparameters
                loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3]  # GIoU, obj, cls

            # Run NMS
            # t1累计后处理NMS的时间
            t = torch_utils.time_synchronized()
            """
            non_max_suppression进行非极大值抑制;
            conf_thres为置信度阈值，iou_thres为iou阈值
            merge为是否合并框
            """
            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)
            t1 += torch_utils.time_synchronized() - t

        # Statistics per image
        # 为每一张图片做统计, 写入预测信息到txt文件, 生成json文件字典, 统计tp等
        for si, pred in enumerate(output):
            # 获取第si张图片的标签信息, 包括class,x,y,w,h
            # targets[:, 0]为标签属于哪一张图片的编号
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            # 获取标签类别
            tcls = labels[:, 0].tolist() if nl else []  # target class
            # 统计测试图片数量
            seen += 1

            # 如果预测为空，则添加空的信息到stats里
            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
                continue

                # Append to text file
            # 保存预测结果为txt文件
            if save_txt:
                # 获得对应图片的长和宽
                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh
                # 根据图片名字设置txt文件的路径
                txt_path = str(out / Path(paths[si]).stem)
                # 将预测框的坐标调整到基于其原本长宽的坐标
                pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], shapes[si][0],
                                           shapes[si][1])  # to original
                for *xyxy, conf, cls in pred:
                    # xyxy格式->xywh, 并对坐标进行归一化处理
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    # 保存预测类别和坐标到txt文件
                    with open(txt_path + '.txt', 'a') as f:
                        f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format
            # Clip boxes to image bounds
            # 修正预测坐标到图片内部
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            # 保存coco格式的json文件字典
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                # coco格式json文件大概包含信息如上
                # 获取图片id
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                # 获取框
                box = pred[:, :4].clone()  # xyxy
                # 将框调整为基于原图大小的
                scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1])  # to original shape
                # 转换为xywh格式
                box = xyxy2xywh(box)  # xywh
                """
                值得注意的是，之前所说的xyxy格式为左上角右下角的坐标
                xywh是中心点坐标和长和宽
                而coco的json格式中的框坐标格式为xywh,此处的xy为左上角坐标
                也就是coco的json格式的坐标格式为：左上角坐标+长宽
                所以下面一行代码就是将：中心点坐标->左上角
                """
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                """
                image_id：图片id, 即属于哪张图
                category_id: 类别, coco91class()从索引0~79映射到索引0~90
                bbox：框的坐标
                score：置信度
                """
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({'image_id': image_id,
                                  'category_id': coco91class[int(p[5])],
                                  'bbox': [round(x, 3) for x in b],
                                  'score': round(p[4], 5)})

            # Assign all predictions as incorrect
            # 初始化预测评定，niou为iou阈值的个数
            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
            if nl:
                # detected用来存放已检测到的目标
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                # 获得xyxy格式的框并乘以wh
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                # 对图片中的每个类单独处理
                for cls in torch.unique(tcls_tensor):
                    # 标签框该类别的索引
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # prediction indices
                    # 预测框该类别的索引
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        # box_iou计算预测框与标签框的iou值，max(1)选出最大的ious值,i为对应的索引
                        """
                        pred shape[N, 4]
                        tbox shape[M, 4]
                        box_iou shape[N, M]
                        ious shape[N, 1]
                        i shape[N, 1], i里的值属于0~M
                        """
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        for j in (ious > iouv[0]).nonzero():
                            # 获得检测到的目标
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                # 添加d到detected
                                detected.append(d)
                                # iouv为以0.05为步长 0.5到0.95的序列
                                # 获得不同iou阈值下的true positive
                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(detected) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            # 每张图片的结果统计到stats里
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        # 画出第1个batch的图片的ground truth和预测框并保存
        if batch_i < 1:
            f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i)  # filename
            plot_images(img, targets, paths, str(f), names)  # ground truth
            f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
            plot_images(img, output_to_target(output, width, height), paths, str(f), names)  # predictions

    # Compute statistics
    # 将stats列表的信息拼接到一起
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats):
        # 根据上面得到的tp等信息计算指标
        # 精准度TP/TP+FP，召回率TP/P，map，f1分数，类别
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, AP@0.5, AP@0.5:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        # nt是一个列表，测试集每个类别有多少个标签框
        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    # 打印指标结果
    pf = '%20s' + '%12.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    # 细节展示每一个类别的指标
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    # 打印前向传播耗费的时间、nms的时间、总时间
    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
    if not training:
        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)

    # Save JSON
    # 采用之前保存的json格式预测结果，通过cocoapi评估指标
    # 需要注意的是 测试集的标签也需要转成coco的json格式
    if save_json and map50 and len(jdict):
        # 获取图片id
        imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]
        # 获取预测框的json文件路径并打开
        f = 'detections_val2017_%s_results.json' % \
            (weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '')  # filename
        print('\nCOCO mAP with pycocotools... saving %s...' % f)
        with open(f, 'w') as file:
            json.dump(jdict, file)

        try:
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            # https:///cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            # 获取并初始化测试集标签的json文件
            cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0])  # initialize COCO ground truth api
            # 初始化预测框的文件
            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api
            # 创建评估器
            cocoEval = COCOeval（cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
            # 评估
            cocoEval.evaluate()
            cocoEval.accumulate()
            # 展示结果
            cocoEval.summarize()
            map, map50 = cocoEval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
        except:
            print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '
                  'See https:///cocodataset/cocoapi/issues/356')

    # Return results
    # 返回测试指标结果
    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t

以上我根据ultralytics\yolov5的train.py代码对其整体流程做一个梳理，讲解每个部分的代码的作用，但是对于一些细节函数还没做详细解析，就比如说计做NMS处理的non_max_suppression()函数等，这些函数在utils.py文件里，之后更新解析utils.py。

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。