本文将YOLOv11模型的C2PSA模块中的注意力层替换MLCA,组合成C2PSA_MLCA模块
MLCA代码:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math from .block
import PSABlock,C2PSA
class MLCA(nn.Module):
def init(self, in_size,local_size=5,gamma = 2, b = 1,local_weight=0.5): super(MLCA, self).init()
# ECA 计算方法
self.local_size=local_size
self.gamma = gamma
self.b = b
t = int(abs(math.log(in_size, 2) + self.b) / self.gamma) # eca gamma=2
k = t if t % 2 else t + 1
self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False)
self.conv_local = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False)
self.local_weight=local_weight
self.local_arv_pool = nn.AdaptiveAvgPool2d(local_size)
self.global_arv_pool = nn.AdaptiveAvgPool2d(1)
def forward(self, x):
local_arv=self.local_arv_pool(x)
global_arv=self.global_arv_pool(local_arv)
b,c,m,n = x.shape
b_local, c_local, m_local, n_local = local_arv.shape
# (b,c,local_size,local_size) -> (b,c,local_size*local_size)-> (b,local_size*local_size,c)-> (b,1,local_size*local_size*c)
temp_local= local_arv.view(b, c_local, -1).transpose(-1, -2).reshape(b, 1, -1)
temp_global = global_arv.view(b, c, -1).transpose(-1, -2)
y_local = self.conv_local(temp_local)
y_global = self.conv(temp_global)
# (b,c,local_size,local_size) <- (b,c,local_size*local_size)<-(b,local_size*local_size,c) <- (b,1,local_size*local_size*c)
y_local_transpose=y_local.reshape(b, self.local_size * self.local_size,c).transpose(-1,-2).view(b,c, self.local_size , self.local_size)
# y_global_transpose = y_global.view(b, -1).transpose(-1, -2).unsqueeze(-1)
y_global_transpose = y_global.view(b, -1).unsqueeze(-1).unsqueeze(-1) # 代码修正
# print(y_global_transpose.size())
# 反池化
att_local = y_local_transpose.sigmoid()
att_global = F.adaptive_avg_pool2d(y_global_transpose.sigmoid(),[self.local_size, self.local_size])
# print(att_local.size())
# print(att_global.size())
att_all = F.adaptive_avg_pool2d(att_global*(1-self.local_weight)+(att_local*self.local_weight), [m, n])
# print(att_all.size())
x=x*att_all
return x
class PSABlock_MLCA(PSABlock):
def __init__(self, c, qk_dim =16 , pdim=32, shortcut=True) -> None:
"""Initializes the PSABlock with attention and feed-forward layers for enhanced feature extraction."""
super().__init__(c)
self.ffn = MLCA(c)
class C2PSA_MLCA(C2PSA):
def __init__(self, c1, c2, n=1, e=0.5):
"""Initializes the C2PSA module with specified input/output channels, number of layers, and expansion ratio."""
super().__init__(c1, c2)
assert c1 == c2
self.c = int(c1 * e)
self.m = nn.Sequential(*(PSABlock_MLCA(self.c, qk_dim =16 , pdim=32) for _ in range(n)))
if name =='main':
MLCA = MLCA(256)
#创建一个输入张量
batch_size = 1
input_tensor=torch.randn(batch_size, 256, 64, 64 )
#运行模型并打印输入和输出的形状
output_tensor =MLCA(input_tensor)
print("Input shape:",input_tensor.shape)
print("0utput shape:",output_tensor.shape)
首先将上面的核心代码复制到E:\ultralytics-main\ultralytics\nn\modules路径下,如下图所示。
在task.py中导入MLCA包
在task.py中的模型配置部分下面代码
将模型配置文件复制到YOLOV11.YAMY文件中
Ultralytics YOLO 🚀, AGPL-3.0 license
YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
Parameters
nc: 80 # number of classes scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
[depth, width, max_channels]
n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs m: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs l: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs x: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs
YOLO11n backbone
backbone:
[from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 2, C3k2, [256, False, 0.25]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 2, C3k2, [512, False, 0.25]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 2, C3k2, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 2, C3k2, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 2, C2PSA_MLCA, [1024]] # 10
YOLO11n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 2, C3k2, [512, False]] # 13
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 2, C3k2, [256, False]] # 16 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
创建一个train的python文件开始训练
import warnings
warnings.filterwarnings('ignore')
from ultralytics import YOLO
if __name__ == '__main__':
model = YOLO('ultralytics/cfg/models/11/yolo11.yaml') # 指定YOLO模型对象,并加载指定配置文件中的模型配置
# model.load('yolov11s.pt') #加载预训练的权重文件'yolov11s.pt',加速训练并提升模型性能
model.train(data='ultralytics/cfg/datasets/NEU-DET.yaml', # 指定训练数据集的配置文件路径,这个.yaml文件包含了数据集的路径和类别信息
cache=False, # 是否缓存数据集以加快后续训练速度,False表示不缓存
imgsz=640, # 指定训练时使用的图像尺寸,640表示将输入图像调整为640x640像素
epochs=1, # 设置训练的总轮数为200轮
batch=16, # 设置每个训练批次的大小为16,即每次更新模型时使用16张图片
close_mosaic=10, # 设置在训练结束前多少轮关闭 Mosaic 数据增强,10 表示在训练的最后 10 轮中关闭 Mosaic
workers=8, # 设置用于数据加载的线程数为8,更多线程可以加快数据加载速度
patience=50, # 在训练时,如果经过50轮性能没有提升,则停止训练(早停机制)
device='0', # 指定使用的设备,'0'表示使用第一块GPU进行训练
optimizer='SGD', # 设置优化器为SGD(随机梯度下降),用于模型参数更新
)