YOLOv5代码详解 (第一部分)
- 1. train.py
- 1.1 使用nvidia的apex接口计算混合精度训练
- 1.2 获取文件路径
- 1.3 获取数据路径
- 1.4 移除之前的结果
- 1.5 创建模型
- 1.6 检查训练和测试图片尺寸
- 1.7 设置优化器参数
- 1.8 加载预训练模型和权重,并写入训练结果到results.txt
- 1.9 把混合精度训练加载入训练中
- 1.10 设置cosine调度器,定义学习率衰减
- 1.11 定义并初始化分布式训练
- 1.12 载入训练集和测试集
- 1.13 模型参数
- 1.14 类别统计
- 1.15 检查anchors是否存在
- 1.16 指数移动平均
- 1.17 开始训练
- 1.17.1 获取参数
- 1.17.2 训练开始
- 1.18 定义模型文件名字
- 1.19 训练结束,返回结果
1. train.py
1.1 使用nvidia的apex接口计算混合精度训练
mixed_precision = True
try: # Mixed precision training https:///NVIDIA/apex
from apex import amp
except:
print('Apex recommended for faster mixed precision training: https:///NVIDIA/apex')
mixed_precision = False # not installed
1.2 获取文件路径
wdir = 'weights' + os.sep # weights dir
os.makedirs(wdir, exist_ok=True)
last = wdir + 'last.pt'
best = wdir + ''
results_file = 'results.txt'
1.3 获取数据路径
# Configure
init_seeds(1)
with open(opt.data) as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
train_path = data_dict['train']
test_path = data_dict['val']
nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes
1.4 移除之前的结果
# Remove previous results
for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
os.remove(f)
1.5 创建模型
# Create model
model = Model(opt.cfg).to(device)
assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
model.names = data_dict['names']
assert是一个判断表达式,在assert后面成立时创建模型。
参考链接
1.6 检查训练和测试图片尺寸
# Image sizes
gs = int(max(model.stride)) # grid size (max stride)
imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples
1.7 设置优化器参数
# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in model.named_parameters():
if v.requires_grad:
if '.bias' in k:
pg2.append(v) # biases
elif '.weight' in k and '.bn' not in k:
pg1.append(v) # apply weight decay
else:
pg0.append(v) # all else
optimizer <span >=</span> optim<span >.</span>Adam<span >(</span>pg0<span >,</span> lr<span >=</span>hyp<span >[</span><span >'lr0'</span><span >]</span><span >)</span> <span >if</span> opt<span >.</span>adam <span >else</span> \
optim<span >.</span>SGD<span >(</span>pg0<span >,</span> lr<span >=</span>hyp<span >[</span><span >'lr0'</span><span >]</span><span >,</span> momentum<span >=</span>hyp<span >[</span><span >'momentum'</span><span >]</span><span >,</span> nesterov<span >=</span><span >True</span><span >)</span>
optimizer<span >.</span>add_param_group<span >(</span><span >{<!-- --></span><span >'params'</span><span >:</span> pg1<span >,</span> <span >'weight_decay'</span><span >:</span> hyp<span >[</span><span >'weight_decay'</span><span >]</span><span >}</span><span >)</span> <span ># add pg1 with weight_decay</span>
optimizer<span >.</span>add_param_group<span >(</span><span >{<!-- --></span><span >'params'</span><span >:</span> pg2<span >}</span><span >)</span> <span ># add pg2 (biases)</span>
<span >print</span><span >(</span><span >'Optimizer groups: %g .bias, %g conv.weight, %g other'</span> <span >%</span> <span >(</span><span >len</span><span >(</span>pg2<span >)</span><span >,</span> <span >len</span><span >(</span>pg1<span >)</span><span >,</span> <span >len</span><span >(</span>pg0<span >)</span><span >)</span><span >)</span>
<span >del</span> pg0<span >,</span> pg1<span >,</span> pg2
Optimizer groups: 102 .bias, 108 conv.weight, 99 other
del并非删除数据,而是删除变量(删除指向数据的链接)
1.8 加载预训练模型和权重,并写入训练结果到results.txt
# Load Model
google_utils.attempt_download(weights)
start_epoch, best_fitness = 0, 0.0
if weights.endswith('.pt'): # pytorch format
ckpt = torch.load(weights, map_location=device) # load checkpoint
<span ># load model</span>
<span >try</span><span >:</span>
ckpt<span >[</span><span >'model'</span><span >]</span> <span >=</span> <span >{<!-- --></span>k<span >:</span> v <span >for</span> k<span >,</span> v <span >in</span> ckpt<span >[</span><span >'model'</span><span >]</span><span >.</span><span >float</span><span >(</span><span >)</span><span >.</span>state_dict<span >(</span><span >)</span><span >.</span>items<span >(</span><span >)</span>
<span >if</span> model<span >.</span>state_dict<span >(</span><span >)</span><span >[</span>k<span >]</span><span >.</span>shape <span >==</span> v<span >.</span>shape<span >}</span> <span ># to FP32, filter</span>
model<span >.</span>load_state_dict<span >(</span>ckpt<span >[</span><span >'model'</span><span >]</span><span >,</span> strict<span >=</span><span >False</span><span >)</span>
<span >except</span> KeyError <span >as</span> e<span >:</span>
s <span >=</span> <span >"%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s."</span> \
<span >%</span> <span >(</span>opt<span >.</span>weights<span >,</span> opt<span >.</span>cfg<span >,</span> opt<span >.</span>weights<span >)</span>
<span >raise</span> KeyError<span >(</span>s<span >)</span> <span >from</span> e
<span ># load optimizer</span>
<span >if</span> ckpt<span >[</span><span >'optimizer'</span><span >]</span> <span >is</span> <span >not</span> <span >None</span><span >:</span>
optimizer<span >.</span>load_state_dict<span >(</span>ckpt<span >[</span><span >'optimizer'</span><span >]</span><span >)</span>
best_fitness <span >=</span> ckpt<span >[</span><span >'best_fitness'</span><span >]</span>
<span ># load results</span>
<span >if</span> ckpt<span >.</span>get<span >(</span><span >'training_results'</span><span >)</span> <span >is</span> <span >not</span> <span >None</span><span >:</span>
<span >with</span> <span >open</span><span >(</span>results_file<span >,</span> <span >'w'</span><span >)</span> <span >as</span> <span >file</span><span >:</span>
<span >file</span><span >.</span>write<span >(</span>ckpt<span >[</span><span >'training_results'</span><span >]</span><span >)</span> <span ># write results.txt</span>
start_epoch <span >=</span> ckpt<span >[</span><span >'epoch'</span><span >]</span> <span >+</span> <span >1</span>
<span >del</span> ckpt
1.9 把混合精度训练加载入训练中
若之前mixed_precision=False
则不会加入混合精度训练至训练中。
if mixed_precision:
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
opt_level=‘O1’ ,这里不是‘零1’,而是“O1”(偶1)
1.10 设置cosine调度器,定义学习率衰减
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
scheduler.last_epoch = start_epoch - 1 # do not move
1.11 定义并初始化分布式训练
# Initialize distributed training
if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
dist.init_process_group(backend='nccl', # distributed backend
init_method='tcp://127.0.0.1:9999', # init method
world_size=1, # number of nodes
rank=0) # node rank
model = torch.nn.parallel.DistributedDataParallel(model)
当满足上面三个条件(非CPU、cuda设备大于1、分布式torch可用)时,就可以进行分布式训练了。
笔者是用一张卡来训练的,不满足这个条件,没有用到分布式训练。—————————————————————————————————————————
nn.distributedataparallel()支持模型多进程并行,适用于单机或多机,每个进程都具备独立的优化器,执行自己的更新过程。
1.12 载入训练集和测试集
# Trainloader
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class
assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Correct your labels or your model.' % (mlc, nc, opt.cfg)
<span ># Testloader</span>
testloader <span >=</span> create_dataloader<span >(</span>test_path<span >,</span> imgsz_test<span >,</span> batch_size<span >,</span> gs<span >,</span> opt<span >,</span>
hyp<span >=</span>hyp<span >,</span> augment<span >=</span><span >False</span><span >,</span> cache<span >=</span>opt<span >.</span>cache_images<span >,</span> rect<span >=</span><span >True</span><span >)</span><span >[</span><span >0</span><span >]</span>
dataloader和testloader不同之处在于:
- testloader:没有数据增强,rect=True(大概是测试图片保留了原图的长宽比)
- dataloader:数据增强,保留了矩形框训练。
1.13 模型参数
# Model parameters
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
= nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
1.14 类别统计
# Class frequency
labels = np.concatenate(dataset.labels, 0)
c = torch.tensor(labels[:, 0]) # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1.
# model._initialize_biases(cf.to(device))
if tb_writer:
plot_labels(labels)
tb_writer.add_histogram('classes', c, 0)
1.15 检查anchors是否存在
# Check anchors
if not opt.noautoanchor:
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
1.16 指数移动平均
# Exponential moving average
ema = torch_utils.ModelEMA(model)
在深度学习中,经常会使用EMA(指数移动平均)这个方法对模型的参数做平均,以求提高测试指标并增加模型鲁棒。
1.17 开始训练
1.17.1 获取参数
获取开始时间,batch size数量,epochs数量,图片数量。
# Start training
t0 = time.time() # start time
nb = len(dataloader) # number of batches
n_burn = max(3 * nb, 1e3) # burn-in iterations, max(3 epochs, 1k iterations)
maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
print('Using %g dataloader workers' % dataloader.num_workers)
print('Starting training for %g epochs...' % epochs)
# torch.autograd.set_detect_anomaly(True)
1.17.2 训练开始
加载图片权重(可选),定义进度条,设置偏差Burn-in,使用多尺度,前向传播,损失函数,反向传播,优化器,打印进度条,保存训练参数至tensorboard,计算mAP,保存结果到results.txt,保存模型(最好和最后)。
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
model.train()
<span ># Update image weights (optional)</span>
<span >if</span> dataset<span >.</span>image_weights<span >:</span>
w <span >=</span> model<span >.</span>class_weights<span >.</span>cpu<span >(</span><span >)</span><span >.</span>numpy<span >(</span><span >)</span> <span >*</span> <span >(</span><span >1</span> <span >-</span> maps<span >)</span> <span >**</span> <span >2</span> <span ># class weights</span>
image_weights <span >=</span> labels_to_image_weights<span >(</span>dataset<span >.</span>labels<span >,</span> nc<span >=</span>nc<span >,</span> class_weights<span >=</span>w<span >)</span>
dataset<span >.</span>indices <span >=</span> random<span >.</span>choices<span >(</span><span >range</span><span >(</span>dataset<span >.</span>n<span >)</span><span >,</span> weights<span >=</span>image_weights<span >,</span> k<span >=</span>dataset<span >.</span>n<span >)</span> <span ># rand weighted idx</span>
<span ># Update mosaic border</span>
<span ># b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)</span>
<span ># dataset.mosaic_border = [b - imgsz, -b] # height, width borders</span>
mloss <span >=</span> torch<span >.</span>zeros<span >(</span><span >4</span><span >,</span> device<span >=</span>device<span >)</span> <span ># mean losses</span>
<span >print</span><span >(</span><span >(</span><span >'\n'</span> <span >+</span> <span >'%10s'</span> <span >*</span> <span >8</span><span >)</span> <span >%</span> <span >(</span><span >'Epoch'</span><span >,</span> <span >'gpu_mem'</span><span >,</span> <span >'GIoU'</span><span >,</span> <span >'obj'</span><span >,</span> <span >'cls'</span><span >,</span> <span >'total'</span><span >,</span> <span >'targets'</span><span >,</span> <span >'img_size'</span><span >)</span><span >)</span>
pbar <span >=</span> tqdm<span >(</span><span >enumerate</span><span >(</span>dataloader<span >)</span><span >,</span> total<span >=</span>nb<span >)</span> <span ># progress bar</span>
<span >for</span> i<span >,</span> <span >(</span>imgs<span >,</span> targets<span >,</span> paths<span >,</span> _<span >)</span> <span >in</span> pbar<span >:</span> <span ># batch -------------------------------------------------------------</span>
ni <span >=</span> i <span >+</span> nb <span >*</span> epoch <span ># number integrated batches (since train start)</span>
imgs <span >=</span> imgs<span >.</span>to<span >(</span>device<span >)</span><span >.</span><span >float</span><span >(</span><span >)</span> <span >/</span> <span >255.0</span> <span ># uint8 to float32, 0 - 255 to 0.0 - 1.0</span>
<span ># Burn-in</span>
<span >if</span> ni <span ><=</span> n_burn<span >:</span>
xi <span >=</span> <span >[</span><span >0</span><span >,</span> n_burn<span >]</span> <span ># x interp</span>
<span ># model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou)</span>
accumulate <span >=</span> <span >max</span><span >(</span><span >1</span><span >,</span> np<span >.</span>interp<span >(</span>ni<span >,</span> xi<span >,</span> <span >[</span><span >1</span><span >,</span> nbs <span >/</span> batch_size<span >]</span><span >)</span><span >.</span><span >round</span><span >(</span><span >)</span><span >)</span>
<span >for</span> j<span >,</span> x <span >in</span> <span >enumerate</span><span >(</span>optimizer<span >.</span>param_groups<span >)</span><span >:</span>
<span ># bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0</span>
x<span >[</span><span >'lr'</span><span >]</span> <span >=</span> np<span >.</span>interp<span >(</span>ni<span >,</span> xi<span >,</span> <span >[</span><span >0.1</span> <span >if</span> j <span >==</span> <span >2</span> <span >else</span> <span >0.0</span><span >,</span> x<span >[</span><span >'initial_lr'</span><span >]</span> <span >*</span> lf<span >(</span>epoch<span >)</span><span >]</span><span >)</span>
<span >if</span> <span >'momentum'</span> <span >in</span> x<span >:</span>
x<span >[</span><span >'momentum'</span><span >]</span> <span >=</span> np<span >.</span>interp<span >(</span>ni<span >,</span> xi<span >,</span> <span >[</span><span >0.9</span><span >,</span> hyp<span >[</span><span >'momentum'</span><span >]</span><span >]</span><span >)</span>
<span ># Multi-scale</span>
<span >if</span> opt<span >.</span>multi_scale<span >:</span>
sz <span >=</span> random<span >.</span>randrange<span >(</span>imgsz <span >*</span> <span >0.5</span><span >,</span> imgsz <span >*</span> <span >1.5</span> <span >+</span> gs<span >)</span> <span >//</span> gs <span >*</span> gs <span ># size</span>
sf <span >=</span> sz <span >/</span> <span >max</span><span >(</span>imgs<span >.</span>shape<span >[</span><span >2</span><span >:</span><span >]</span><span >)</span> <span ># scale factor</span>
<span >if</span> sf <span >!=</span> <span >1</span><span >:</span>
ns <span >=</span> <span >[</span>math<span >.</span>ceil<span >(</span>x <span >*</span> sf <span >/</span> gs<span >)</span> <span >*</span> gs <span >for</span> x <span >in</span> imgs<span >.</span>shape<span >[</span><span >2</span><span >:</span><span >]</span><span >]</span> <span ># new shape (stretched to gs-multiple)</span>
imgs <span >=</span> F<span >.</span>interpolate<span >(</span>imgs<span >,</span> size<span >=</span>ns<span >,</span> mode<span >=</span><span >'bilinear'</span><span >,</span> align_corners<span >=</span><span >False</span><span >)</span>
<span ># Forward</span>
pred <span >=</span> model<span >(</span>imgs<span >)</span>
<span ># Loss</span>
loss<span >,</span> loss_items <span >=</span> compute_loss<span >(</span>pred<span >,</span> targets<span >.</span>to<span >(</span>device<span >)</span><span >,</span> model<span >)</span>
<span >if</span> <span >not</span> torch<span >.</span>isfinite<span >(</span>loss<span >)</span><span >:</span>
<span >print</span><span >(</span><span >'WARNING: non-finite loss, ending training '</span><span >,</span> loss_items<span >)</span>
<span >return</span> results
<span ># Backward</span>
<span >if</span> mixed_precision<span >:</span>
<span >with</span> amp<span >.</span>scale_loss<span >(</span>loss<span >,</span> optimizer<span >)</span> <span >as</span> scaled_loss<span >:</span>
scaled_loss<span >.</span>backward<span >(</span><span >)</span>
<span >else</span><span >:</span>
loss<span >.</span>backward<span >(</span><span >)</span>
<span ># Optimize</span>
<span >if</span> ni <span >%</span> accumulate <span >==</span> <span >0</span><span >:</span>
optimizer<span >.</span>step<span >(</span><span >)</span>
optimizer<span >.</span>zero_grad<span >(</span><span >)</span>
ema<span >.</span>update<span >(</span>model<span >)</span>
<span ># Print</span>
mloss <span >=</span> <span >(</span>mloss <span >*</span> i <span >+</span> loss_items<span >)</span> <span >/</span> <span >(</span>i <span >+</span> <span >1</span><span >)</span> <span ># update mean losses</span>
mem <span >=</span> <span >'%.3gG'</span> <span >%</span> <span >(</span>torch<span >.</span>cuda<span >.</span>memory_cached<span >(</span><span >)</span> <span >/</span> <span >1E9</span> <span >if</span> torch<span >.</span>cuda<span >.</span>is_available<span >(</span><span >)</span> <span >else</span> <span >0</span><span >)</span> <span ># (GB)</span>
s <span >=</span> <span >(</span><span >'%10s'</span> <span >*</span> <span >2</span> <span >+</span> <span >'%10.4g'</span> <span >*</span> <span >6</span><span >)</span> <span >%</span> <span >(</span>
<span >'%g/%g'</span> <span >%</span> <span >(</span>epoch<span >,</span> epochs <span >-</span> <span >1</span><span >)</span><span >,</span> mem<span >,</span> <span >*</span>mloss<span >,</span> targets<span >.</span>shape<span >[</span><span >0</span><span >]</span><span >,</span> imgs<span >.</span>shape<span >[</span><span >-</span><span >1</span><span >]</span><span >)</span>
pbar<span >.</span>set_description<span >(</span>s<span >)</span>
<span ># Plot</span>
<span >if</span> ni <span ><</span> <span >3</span><span >:</span>
f <span >=</span> <span >'train_batch%g.jpg'</span> <span >%</span> ni <span ># filename</span>
result <span >=</span> plot_images<span >(</span>images<span >=</span>imgs<span >,</span> targets<span >=</span>targets<span >,</span> paths<span >=</span>paths<span >,</span> fname<span >=</span>f<span >)</span>
<span >if</span> tb_writer <span >and</span> result <span >is</span> <span >not</span> <span >None</span><span >:</span>
tb_writer<span >.</span>add_image<span >(</span>f<span >,</span> result<span >,</span> dataformats<span >=</span><span >'HWC'</span><span >,</span> global_step<span >=</span>epoch<span >)</span>
<span ># tb_writer.add_graph(model, imgs) # add model to tensorboard</span>
<span ># end batch ------------------------------------------------------------------------------------------------</span>
<span ># Scheduler</span>
scheduler<span >.</span>step<span >(</span><span >)</span>
<span ># mAP</span>
ema<span >.</span>update_attr<span >(</span>model<span >)</span>
final_epoch <span >=</span> epoch <span >+</span> <span >1</span> <span >==</span> epochs
<span >if</span> <span >not</span> opt<span >.</span>notest <span >or</span> final_epoch<span >:</span> <span ># Calculate mAP</span>
results<span >,</span> maps<span >,</span> times <span >=</span> test<span >.</span>test<span >(</span>opt<span >.</span>data<span >,</span>
batch_size<span >=</span>batch_size<span >,</span>
imgsz<span >=</span>imgsz_test<span >,</span>
save_json<span >=</span>final_epoch <span >and</span> opt<span >.</span>data<span >.</span>endswith<span >(</span>os<span >.</span>sep <span >+</span> <span >'coco.yaml'</span><span >)</span><span >,</span>
model<span >=</span>ema<span >.</span>ema<span >,</span>
single_cls<span >=</span>opt<span >.</span>single_cls<span >,</span>
dataloader<span >=</span>testloader<span >)</span>
<span ># Write</span>
<span >with</span> <span >open</span><span >(</span>results_file<span >,</span> <span >'a'</span><span >)</span> <span >as</span> f<span >:</span>
f<span >.</span>write<span >(</span>s <span >+</span> <span >'%10.4g'</span> <span >*</span> <span >7</span> <span >%</span> results <span >+</span> <span >'\n'</span><span >)</span> <span ># P, R, mAP, F1, test_losses=(GIoU, obj, cls)</span>
<span >if</span> <span >len</span><span >(</span>opt<span >.</span>name<span >)</span> <span >and</span> opt<span >.</span>bucket<span >:</span>
os<span >.</span>system<span >(</span><span >'gsutil cp results.txt gs://%s/results/results%s.txt'</span> <span >%</span> <span >(</span>opt<span >.</span>bucket<span >,</span> opt<span >.</span>name<span >)</span><span >)</span>
<span ># Tensorboard</span>
<span >if</span> tb_writer<span >:</span>
tags <span >=</span> <span >[</span><span >'train/giou_loss'</span><span >,</span> <span >'train/obj_loss'</span><span >,</span> <span >'train/cls_loss'</span><span >,</span>
<span >'metrics/precision'</span><span >,</span> <span >'metrics/recall'</span><span >,</span> <span >'metrics/mAP_0.5'</span><span >,</span> <span >'metrics/F1'</span><span >,</span>
<span >'val/giou_loss'</span><span >,</span> <span >'val/obj_loss'</span><span >,</span> <span >'val/cls_loss'</span><span >]</span>
<span >for</span> x<span >,</span> tag <span >in</span> <span >zip</span><span >(</span><span >list</span><span >(</span>mloss<span >[</span><span >:</span><span >-</span><span >1</span><span >]</span><span >)</span> <span >+</span> <span >list</span><span >(</span>results<span >)</span><span >,</span> tags<span >)</span><span >:</span>
tb_writer<span >.</span>add_scalar<span >(</span>tag<span >,</span> x<span >,</span> epoch<span >)</span>
<span ># Update best mAP</span>
fi <span >=</span> fitness<span >(</span>np<span >.</span>array<span >(</span>results<span >)</span><span >.</span>reshape<span >(</span><span >1</span><span >,</span> <span >-</span><span >1</span><span >)</span><span >)</span> <span ># fitness_i = weighted combination of [P, R, mAP, F1]</span>
<span >if</span> fi <span >></span> best_fitness<span >:</span>
best_fitness <span >=</span> fi
<span ># Save model</span>
save <span >=</span> <span >(</span><span >not</span> opt<span >.</span>nosave<span >)</span> <span >or</span> <span >(</span>final_epoch <span >and</span> <span >not</span> opt<span >.</span>evolve<span >)</span>
<span >if</span> save<span >:</span>
<span >with</span> <span >open</span><span >(</span>results_file<span >,</span> <span >'r'</span><span >)</span> <span >as</span> f<span >:</span> <span ># create checkpoint</span>
ckpt <span >=</span> <span >{<!-- --></span><span >'epoch'</span><span >:</span> epoch<span >,</span>
<span >'best_fitness'</span><span >:</span> best_fitness<span >,</span>
<span >'training_results'</span><span >:</span> f<span >.</span>read<span >(</span><span >)</span><span >,</span>
<span >'model'</span><span >:</span> ema<span >.</span>ema<span >.</span>module <span >if</span> <span >hasattr</span><span >(</span>model<span >,</span> <span >'module'</span><span >)</span> <span >else</span> ema<span >.</span>ema<span >,</span>
<span >'optimizer'</span><span >:</span> <span >None</span> <span >if</span> final_epoch <span >else</span> optimizer<span >.</span>state_dict<span >(</span><span >)</span><span >}</span>
<span ># Save last, best and delete</span>
torch<span >.</span>save<span >(</span>ckpt<span >,</span> last<span >)</span>
<span >if</span> <span >(</span>best_fitness <span >==</span> fi<span >)</span> <span >and</span> <span >not</span> final_epoch<span >:</span>
torch<span >.</span>save<span >(</span>ckpt<span >,</span> best<span >)</span>
<span >del</span> ckpt
<span ># end epoch ----------------------------------------------------------------------------------------------------</span>
<span ># end training</span>
Image sizes 608 train, 608 test(设置训练和测试图片的size)
Using 8 dataloader workers(设置batch size 为8,即一次性输入8张图片训练)
Starting training for 100 epochs… (设置为100个epochs)
——————————————————————————————————————
tqdm是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)。
python pbar = tqdm(enumerate(dataloader), total=nb)
表示进度条,total=nb
预期的迭代次数,即你上面设置的epochs。
——————————————————————————————————————
results.txt保存结果:0/49 6.44G 0.09249 0.07952 0.05631 0.2283 6 608 0.1107 0.1954 0.1029 0.03088 0.07504 0.06971 0.03865
epoch, best_fitness, training_results, model, optimizer, img-size, P, R, mAP, F1, test_losses=(GIoU, obj, cls)
(有点对不上,后续再补充)
1.18 定义模型文件名字
n = opt.name
if len(n):
n = '_' + n if not n.isnumeric() else n
fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%' % n, wdir + 'best%' % n
for f1, f2 in zip([wdir + 'last.pt', wdir + '', 'results.txt'], [flast, fbest, fresults]):
if os.path.exists(f1):
os.rename(f1, f2) # rename
ispt = f2.endswith('.pt') # is *.pt
strip_optimizer(f2) if ispt else None # strip optimizer
os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload
1.19 训练结束,返回结果
if not opt.evolve:
plot_results() # save as results.png
print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
dist.destroy_process_group() if device.type != 'cpu' and torch.cuda.device_count() > 1 else None
torch.cuda.empty_cache()
return results
50 epochs completed in 11.954 hours.
# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in model.named_parameters():
if v.requires_grad:
if '.bias' in k:
pg2.append(v) # biases
elif '.weight' in k and '.bn' not in k:
pg1.append(v) # apply weight decay
else:
pg0.append(v) # all else
optimizer <span >=</span> optim<span >.</span>Adam<span >(</span>pg0<span >,</span> lr<span >=</span>hyp<span >[</span><span >'lr0'</span><span >]</span><span >)</span> <span >if</span> opt<span >.</span>adam <span >else</span> \
optim<span >.</span>SGD<span >(</span>pg0<span >,</span> lr<span >=</span>hyp<span >[</span><span >'lr0'</span><span >]</span><span >,</span> momentum<span >=</span>hyp<span >[</span><span >'momentum'</span><span >]</span><span >,</span> nesterov<span >=</span><span >True</span><span >)</span>
optimizer<span >.</span>add_param_group<span >(</span><span >{<!-- --></span><span >'params'</span><span >:</span> pg1<span >,</span> <span >'weight_decay'</span><span >:</span> hyp<span >[</span><span >'weight_decay'</span><span >]</span><span >}</span><span >)</span> <span ># add pg1 with weight_decay</span>
optimizer<span >.</span>add_param_group<span >(</span><span >{<!-- --></span><span >'params'</span><span >:</span> pg2<span >}</span><span >)</span> <span ># add pg2 (biases)</span>
<span >print</span><span >(</span><span >'Optimizer groups: %g .bias, %g conv.weight, %g other'</span> <span >%</span> <span >(</span><span >len</span><span >(</span>pg2<span >)</span><span >,</span> <span >len</span><span >(</span>pg1<span >)</span><span >,</span> <span >len</span><span >(</span>pg0<span >)</span><span >)</span><span >)</span>
<span >del</span> pg0<span >,</span> pg1<span >,</span> pg2