本科是电子信息工程专业,毕设选到了深度学习相关的题目,由于是第一次接触,所以自学了相关内容。
内容大概是复现《人工智能物联网中面向智能任务的语义通信方法》,之后随缘有空更语义压缩部分。
此次是对CIFAR10数据集进行特征提取,使用了RESNET18网络,其中,对RESNET18网络进行了网络改写,将准确率达到了95%。
直接上代码吧!!!
"""
主函数
"""
import torch
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from readData import ReadData
from RESNET18 import ResNet18
from fc_three import Fc_three
# set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# 读数据
batch_size = 32
train_loader, test_loader = ReadData(batch_size=batch_size,
pic_path='./dataset/CIFAR10')
save_path = './model/cifar10/resnet18(CNN)_cifar10.pth'
# 加载模型(使用预处理模型,修改最后一层,固定之前的权重)
n_class = 10
model = ResNet18()
model = model1.to(device)
model.load_state_dict(torch.load(save_path)) #载入权重
# 使用交叉熵损失函数
criterion = nn.CrossEntropyLoss().to(device)
# 开始训练
n_epochs = 50
best_accuracy = 0.0
acc = 0.0
accuracy = []
lr = 0.001 # 前100epoch lr=0.01,之后Lr=0.001
for epoch in tqdm(range(1, n_epochs + 1)):
train_loss = 0.0
total_sample = 0
right_sample = 0
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
###################
# 训练集的模型 #
###################
model.train() # 作用是启用batch normalization和drop out
for batch_idx, datas in enumerate(train_loader):
data, target = datas # 取出数据及标签
data = data.to(device) # torch.Size([32, 3, 32, 32])
target = target.to(device)
# clear the gradients of all optimized variables(清除梯度)
optimizer1.zero_grad()
optimizer2.zero_grad()
# (正向传递:通过向模型传递输入来计算预测输出)
output = model(data).to(device) # CNN
# calculate the batch loss(计算损失值)
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
# (反向传递:计算损失相对于模型参数的梯度)
loss.backward()
# 执行单个优化步骤(参数更新)
optimizer1.step()
# update training loss(更新损失)
train_loss += loss.item() * data.size(0)
# 计算平均损失
train_loss = train_loss / len(train_loader.sampler)
print('[epoch:%d] train loss: %.3f' % (epoch, train_loss))
######################
# 测试集的模型#
######################
model1.eval()
model2.eval() # 验证模型
with torch.no_grad():
for data, target in test_loader:
data = data.to(device)
target = target.to(device)
# forward pass: compute predicted outputs by passing inputs to the model
output = model1(data).to(device)
# convert output probabilities to predicted class(将输出概率转换为预测类)
_, pred = torch.max(output, 1)
# compare predictions to true label(将预测与真实标签进行比较)
correct_tensor = pred.eq(target.data.view_as(pred))
# correct = np.squeeze(correct_tensor.to(device).numpy())
total_sample += batch_size
for i in correct_tensor:
if i:
right_sample += 1
acc = right_sample / total_sample
print("Accuracy on the test data:", 100 * acc, "%")
if acc >= best_accuracy:
print('accuracy increased ({:.3f} --> {:.3f}). Saving model ...'.format(best_accuracy, acc))
best_accuracy = acc
torch.save(model.state_dict(), save_path)
print("best_accuracy: ", 100 * best_accuracy, "%")
"""
读取数据子函数
"""
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from cutout import Cutout
# set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 32 # 每批加载图数量
# # 训练集,测试集
def ReadData(batch_size=16, num_workers=0, pic_path='dataset'):
# #为了提高模型的泛化性,防止训练时在训练集上过拟合,往往在训练的过程中会对训练集进行数据增强操作,例如随机翻转、遮挡、填充后裁剪等操作。
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4), # 先四周填充0,在吧图像随机裁剪成32*32
transforms.RandomHorizontalFlip(), # 图像一半的概率翻转,一半的概率不翻转
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # R,G,B每层的归一化用到的均值和方差
Cutout(n_holes=1, length=16),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# 将数据转换为torch.FloatTensor,并标准化。
train_data = datasets.CIFAR10(pic_path, train=True,
download=True, transform=transform_train)
test_data = datasets.CIFAR10(pic_path, train=False,
download=True, transform=transform_test)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True,num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False,num_workers=num_workers)
return train_loader, test_loader
"""
RESNET18网络,有更改
"""
import torch.nn as nn
import torch.nn.functional as F
class CommonBlock(nn.Module):
def __init__(self, in_channel, out_channel, stride): # 普通Block简单完成两次卷积操作
super(CommonBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
def forward(self, x):
identity = x # 普通Block的shortcut为直连,不需要升维下采样
x = F.relu(self.bn1(self.conv1(x)), inplace=True) # 完成一次卷积
x = self.bn2(self.conv2(x)) # 第二次卷积不加relu激活函数
x += identity # 两路相加
return F.relu(x, inplace=True) # 添加激活函数输出
class SpecialBlock(nn.Module): # 特殊Block完成两次卷积操作,以及一次升维下采样
def __init__(self, in_channel, out_channel, stride): # 注意这里的stride传入一个数组,shortcut和残差部分stride不同
super(SpecialBlock, self).__init__()
self.change_channel = nn.Sequential( # 负责升维下采样的卷积网络change_channel
nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride[0], padding=0, bias=False),
nn.BatchNorm2d(out_channel)
)
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride[0], padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=stride[1], padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
def forward(self, x):
identity = self.change_channel(x) # 调用change_channel对输入修改,为后面相加做变换准备
x = F.relu(self.bn1(self.conv1(x)), inplace=True)
x = self.bn2(self.conv2(x)) # 完成残差部分的卷积
x += identity
return F.relu(x, inplace=True) # 输出卷积单元
class ResNet18(nn.Module):
def __init__(self):
super(ResNet18_2, self).__init__()
self.prepare = nn.Sequential( # 所有的ResNet共有的预处理==》[batch, 64, 56, 56]
nn.Conv2d(3, 64, 3, 1, 1), ##更改处
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
)
self.layer1 = nn.Sequential( # layer1有点特别,由于输入输出的channel均是64,故两个CommonBlock
CommonBlock(64, 64, 1),
CommonBlock(64, 64, 1)
)
self.layer2 = nn.Sequential( # layer234类似,由于输入输出的channel不同,故一个SpecialBlock,一个CommonBlock
SpecialBlock(64, 128, [2, 1]),
CommonBlock(128, 128, 1)
)
self.layer3 = nn.Sequential(
SpecialBlock(128, 256, [2, 1]),
CommonBlock(256, 256, 1)
)
self.layer4 = nn.Sequential(
SpecialBlock(256, 512, [2, 1]),
CommonBlock(512, 512, 1)
)
self.pool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # 卷积结束,通过一个自适应均值池化==》 [batch, 512, 1, 1]
# 基于三层全连接层构成分类器网络
self.fc = nn.Sequential( # 最后用于分类的全连接层,根据需要灵活变化
nn.Dropout(p=0.5),
nn.Linear(512, 256),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(256, 128),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(128, 10) # 这个使用CIGAR10数据集,定为10分类
)
def forward(self, x):
x = self.prepare(x) # 预处理
x = self.layer1(x) # 四个卷积单元
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x) # torch.Size([32, 512, 4, 4])
x = self.pool(x) # 池化 torch.Size([32, 512, 1, 1])
x = x.reshape(x.shape[0], -1) # 将x展平,输入全连接层
x = self.fc(x)
return x
修改了 resnet-18第一个卷积层,改为使用卷积核尺寸为 3×3, 步长为 2 的卷积。这是由于标准的 resnet-18 是基于 ImageNet 设计的,图像尺寸为 224×224,而 CIFAR-10输入图像为 32×32,尺寸太小,如果过早降采样会丢失大量图像信息,并且删去了池化层。
"""
测试
"""
import torch
from readData import ReadData
from RESNET18 import ResNet18
# set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
n_class = 10
batch_size = 32
save_path = './model/cifar10/resnet18_cifar10.pth' # CNN保存路径
train_loader, test_loader = ReadData(batch_size=batch_size,
pic_path='D:/pythonProject/test1/dataset/CIFAR10')
# 载入权重
model = ResNet18()
model.load_state_dict(torch.load(save_path))
model = model1.to(device)
total_sample = 0
right_sample = 0
model.eval() # 验证模型
with torch.no_grad():
for data, target in test_loader:
data = data.to(device)
target = target.to(device)
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data).to(device) # CNN
# convert output probabilities to predicted class(将输出概率转换为预测类)
_, pred = torch.max(output, 1)
# compare predictions to true label(将预测与真实标签进行比较)
correct_tensor = pred.eq(target.data.view_as(pred))
# correct = np.squeeze(correct_tensor.to(device).numpy())
total_sample += batch_size
for i in correct_tensor:
if i:
right_sample += 1
print("Accuracy on the test data:", 100 * right_sample / total_sample, "%")
最终的准确率大约是95%。