一、查看当前学习率

print(optimizer.state_dict()['param_groups'][0]['lr'])

二、学习率调整策略

# 1. 等间隔调整学习率每隔step_size将学习率调整为当前lr的gamma倍,注意是step不是epoch
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma=0.1, last_epoch=-1)
# 2. 按照milestones中定义的step,在指定step将学习率调整为当前lr的gamma倍,如milestones=[30,60,80]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1, last_epoch=-1)
# 3. 指数衰减调整学习率,  lr=lr∗gamma∗∗epoch
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma, last_epoch=-1)
# 4. 余弦退火调整学习率,以余弦函数为周期,并在每个周期最大值时重新设置学习率。以初始学习率为最大学习率,以 2 ∗ T m a x 2*Tmax2∗Tmax 为周期,在一个周期内先下降,后上升。
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max, eta_min=0, last_epoch=-1)
# 5. 自适应调整学习率,当某指标不再变化(下降或升高),调整学习率,这是非常实用的学习率调整策略。例如,当验证集的 loss 不再下降时,进行学习率调整;或者监测验证集的 accuracy,当accuracy 不再上升时,则调整学习率。
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
# ReduceLROnPlateau参数说明:
# - mode: min表示当指标不再降低(如监测loss), max 表示当指标不再升高(如监测 accuracy)。
# - factor:相当于gamma
# - patience:忍受该指标多少个step不变化,当忍无可忍时,调整学习率。
# - verbose:是否打印学习率信息
# - threshold_mode:选择判断指标是否达最优的模式,有两种模式, rel和abs。
#       threshold_mode=rel时:
#           mode=max时,dynamic_threshold=best*(1+threshold)
#           mode=min时,dynamic_threshold=best*(1-threshold)
#       threshold_mode=abs时:
#           mode=max时,dynamic_threshold=best+threshold
#           mode=min时,dynamic_threshold=best-threshold
# - threshold: 配合threshold_mode使用。
# - cooldown: “冷却时间“,当调整学习率之后,让学习率调整策略冷静一下,让模型再训练一段时间,再重启监测模式。
# - min_lr: 学习率下限,可为float或list,当有多个参数组时,可用list进行设置。
# - eps: 学习率衰减的最小值,当学习率变化小于 eps 时,则不调整学习率。
# 6. 自定义调整学习率: 为不同参数组设定不同学习率调整策略,lr=base_lr∗lmbda(self.last_epoch)
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1)
# fine-tune 中十分有用,我们不仅可为不同的层设定不同的学习率,还可以为其设定不同的学习率调整策略。
# lr_lambda(function or list)- 一个计算学习率调整倍数的函数,输入通常为 step,当有多个参数组时,设为 list。
# 以上使用scheduler时,还需要将`optimizer.step()`替换为`scheduler.step()`

# 7. 利用optimizer.param_groups自定义调整策略
def adjust_lr(optimizer, epoch, decay_rate=0.85, decay_epoch=10):      
   if epoch>decay_epoch:
        for param_group in optimizer.param_groups:
            param_group['lr'] *= decay_rate、
def adjust_learning_rate_epoch_step(optimizer, epoch, num_epochs, base_lr, epoch_step)
    lr = base_lr * 0.1**(epoch/int(num_epochs/epoch_step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr

三、搭建简单神经网络

1. 搭建网络训练和测试回归问题
# 建立数据
x=torch.unsqueeze(torch.linspace(-1,1,100),dim=1) # unsqueeze()把一维数据变成二维数据
y=x.pow(2)+0.2*torch.rand(x.size())
# 构建MLP网络
class Net(torch.nn.Module):
    def __init__(self,n_feature,n_hidden,n_output):
        super(Net,self).__init__()
        self.hidden=torch.nn.Linear(n_feature,n_hidden) # 隐藏层线性输出
        self.predict=torch.nn.Linear(n_hidden,n_output) # 输出层线性输出

    def forward(self,x):
        # 正向传播输入值,神经网络分析输出值
        x=F.relu(self.hidden(x)) #激励函数
        x=self.predict(x) #输出值
        return x
net=Net(n_feature=1,n_hidden=10,n_output=1)
print(net) #打印net的结构
# 训练网络
# optimizer是训练的工具
optimizer=torch.optim.SGD(net.parameters(),lr=0.2) # 传入net的所有参数,学习率
loss_func=torch.nn.MSELoss() # 真实值和预测值的误差计算公式(均方差)
for t in range(200):
    prediction=net(x)
    loss=loss_func(prediction,y)
    optimizer.zero_grad() #清空上一步的残余更新参数值
    loss.backward()  #误差反向传播,计算参数更新值
    optimizer.step() #更新参数
2. 搭建网络训练和测试mnist分类
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets,transforms
import matplotlib.pyplot as plt

#设置一些超参
num_epochs = 1        #训练的周期
batch_size = 100      #批训练的数量
learning_rate = 0.001 #学习率(0.1,0.01,0.001)
# 导入数据并进行transfrom等操作,继承了torch.utils.data.Datasets类,该类里面的__getitem__方法定义了数据的结构形式,如果需要pytorch内置采样外的其他采样,如PK sample,也从这里面修改
train_dataset = datasets.MNIST(root='E:/MNIST/',                #数据集保存路径
                               train=True,                      #是否作为训练集
                               transform=transforms.Compose([transforms.ToTensor()]), #数据如何处理, 可以自己自定义
                               download=True)                   #路径下没有的话, 可以下载
                             
test_dataset = datasets.MNIST(root='E:/MNIST/',
                              train=False,
                              transform=transforms.Compose([transforms.ToTensor()]))    
# dataset导入dataloader,切成不同的batch并进行shuffle等操作
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, #分批
                                           batch_size=batch_size,
                                           shuffle=True)          #随机分批
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
# 定义网络
class MLP(nn.Module):                    #继承nn.module
	def __init__(self):
	super(MLP, self).__init__()      #继承的作用
        self.layer1 = nn.Linear(784,300)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(300,10)

        def forward(self,x):             #网络传播的结构
        x = x.reshape(-1, 28*28)
        x = self.layer1(x)
        x = self.relu(x)
        y = self.layer2(x)
        return y
mlp = MLP() #类的实例化
# 模型训练
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate)
mlp.train()
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        outputs = mlp(images)
        loss = loss_func(outputs, labels)
        optimizer.zero_grad()                          #清零梯度
        loss.backward()                                #反向求梯度
        optimizer.step()
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))
# 模型测试
mlp.eval()      #测试模式,关闭正则化
correct = 0
total = 0
for images, labels in test_loader:
    outputs = mlp(images)
    _, predicted = torch.max(outputs, 1)   #返回值和索引
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print('测试准确率: {:.4f}'.format(100.0*correct/total))
3. 用ResNet-50迁移学习进行图像分类训练
# 建立自己的数据集,结构如下:
# |--animals-10
# |--|--train
# |--|--|--bear
# |--|--|--zebra
# |--|--val
# |--|--|--bear
# |--|--|--zebra
# |--|--test
# |--|--|--bear
# |--|--|--zebra
import torch
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import time
import numpy as np
import matplotlib.pyplot as plt
import os

 # 数据增强
image_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
}
# 加载数据
dataset = 'animals-10'
train_directory = os.path.join(dataset, 'train')
valid_directory = os.path.join(dataset, 'valid')
batch_size = 32
num_classes = 10
data = {
    'train': datasets.ImageFolder(root=train_directory, transform=image_transforms['train']),
    'valid': datasets.ImageFolder(root=valid_directory, transform=image_transforms['valid'])
 
}
train_data_size = len(data['train'])
valid_data_size = len(data['valid'])
train_data = DataLoader(data['train'], batch_size=batch_size, shuffle=True)
valid_data = DataLoader(data['valid'], batch_size=batch_size, shuffle=True)
print(train_data_size, valid_data_size)
# 加载预训练模型:迁移学习
resnet50 = models.resnet50(pretrained=True)
# 设置不更新resnet50的参数,一般也会一更小的lr进行fine-tuning
for param in resnet50.parameters():
    param.requires_grad = False
fc_inputs = resnet50.fc.in_features
resnet50.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 10),
    nn.LogSoftmax(dim=1)
)
resnet50 = resnet50.to('cuda:0')
loss_func = nn.NLLLoss()
optimizer = optim.Adam(resnet50.parameters())
# 训练
def train_and_valid(model, loss_function, optimizer, epochs=25):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    history = []
    best_acc = 0.0
    best_epoch = 0
    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))
        model.train()
        train_loss = 0.0
        train_acc = 0.0
        valid_loss = 0.0
        valid_acc = 0.0
        for i, (inputs, labels) in enumerate(train_data):
            inputs = inputs.to(device)
            labels = labels.to(device)
            #因为这里梯度是累加的,所以每次记得清零
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
            train_acc += acc.item() * inputs.size(0)
        with torch.no_grad():
            model.eval()
            for j, (inputs, labels) in enumerate(valid_data):
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = loss_function(outputs, labels)
                valid_loss += loss.item() * inputs.size(0)
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))
                acc = torch.mean(correct_counts.type(torch.FloatTensor))
                valid_acc += acc.item() * inputs.size(0)
        avg_train_loss = train_loss/train_data_size
        avg_train_acc = train_acc/train_data_size
        avg_valid_loss = valid_loss/valid_data_size
        avg_valid_acc = valid_acc/valid_data_size
        history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])
        if best_acc < avg_valid_acc:
            best_acc = avg_valid_acc
            best_epoch = epoch + 1
        epoch_end = time.time()
        print("Epoch: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation: Loss: {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch+1, avg_valid_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))
        print("Best Accuracy for validation : {:.4f} at epoch {:03d}".format(best_acc, best_epoch))
        torch.save(model, 'models/'+dataset+'_model_'+str(epoch+1)+'.pt')
    return model, history
    
num_epochs = 30
trained_model, history = train_and_valid(resnet50, loss_func, optimizer, num_epochs)
torch.save(history, 'models/'+dataset+'_history.pt')
# 画图
history = np.array(history)
plt.plot(history[:, 0:2])
plt.legend(['Tr Loss', 'Val Loss'])
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.ylim(0, 1)
plt.savefig(dataset+'_loss_curve.png')
plt.show()
plt.plot(history[:, 2:4])
plt.legend(['Tr Accuracy', 'Val Accuracy'])
plt.xlabel('Epoch Number')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.savefig(dataset+'_accuracy_curve.png')
plt.show()
4. 需要自己继承重写Datasets的情况
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
# ignore warnings
import warnings
warnings.filterwarnings("ignore")
plt.ion()

# pandas读入数据

landmarks_frame = pd.read_csv(r'../data/faces/face_landmarks.csv')
n = 65
img_name = landmarks_frame.iloc[n, 0]
landmarks = landmarks_frame.iloc[n, 1:].as_matrix()
landmarks = landmarks.astype('float').reshape(-1, 2)
print('Image name: {}'.format(img_name))
print('Landmarks shape: {}'.format(landmarks.shape))
print('First 4 landmarks: {}'.format(landmarks[:4]))
"""
Image name: person-7.jpg
Landmarks shape: (68, 2)
First 4 landmarks: [[32. 65.]
 [33. 76.]
 [34. 86.]
 [34. 97.]]
"""
# 定义一个显示图片和landmarks的函数
# 显示图片和landmarks的函数
def show_landmarks(image, landmarks):
    """ show image with landmarks"""
    
    plt.imshow(image)
    plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker='.', c='r')
    plt.pause(0.001)
    
plt.figure()
show_landmarks(io.imread(os.path.join('../data/faces', img_name)), landmarks)
plt.show()
# 定义一个Dataset类, 继承重载torch.utils.Dataset类
class FaceLandmarksDataset(Dataset):
    """Face Landmarks dataset."""
    def __init__(self, csv_file, root_dir, transform=None):
        """
        :param csv_file: csv文件的路径
        :param root_dir: 图像的文件夹路径
        :param transform: 可选的transform
        """
        self.landmarks_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform 
    def __len__(self):
        return len(self.landmarks_frame)
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
        image = io.imread(img_name)
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        sample = {'image': image, 'landmarks': landmarks}
        if self.transform:
            sample = self.transform(sample)
        return sample
# 初始化此类
face_dataset = FaceLandmarksDataset(csv_file=r'../data/faces/face_landmarks.csv', root_dir='../data/faces/')
# 绘制前4个图.
fig = plt.figure()
for i in range(len(face_dataset)):
    sample = face_dataset[i]
    print(i, sample['image'].shape, sample['landmarks'].shape)
    ax = plt.subplot(1, 4, i+1)
    plt.tight_layout()
    ax.set_title('Sample #{}'.format(i))
    ax.axis('off')
    show_landmarks(**sample)
    if i == 3:
        plt.show()
        break
"""
0 (324, 215, 3) (68, 2)
1 (500, 333, 3) (68, 2)
2 (250, 258, 3) (68, 2)
3 (434, 290, 3) (68, 2)
"""
# 自定义图像的变换
class Rescale(object):
    """图像缩放"""
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size
    def __call__(self, sample):
        image, landmarks = sample['image'], sample['landmarks']
        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            # 如果目标尺寸只有一个值, 那么按照最小边缩放到此值.
            if h > w: new_h, new_w = self.output_size * h / w, self.output_size
            else: new_h, new_w = self.output_size, self.output_size * w / h
        else: new_h, new_w = self.output_size
        new_h, new_w = int(new_h), int(new_w)
        img = transform.resize(image, (new_h, new_w))
        # 对landmarks做缩放变换. landmarks的x值是横坐标, y是纵坐标.
        landmarks = landmarks * [new_w / w, new_h / h]
        return {'image': img, 'landmarks': landmarks}
        
class RandomCrop(object):
    """随机裁剪"""
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int): self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size
    def __call__(self, sample):
        image, landmarks = sample['image'], sample['landmarks']
        h, w = image.shape[:2]
        new_h, new_w = self.output_size
        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)
        image = image[top: top + new_h, left: left + new_w]
        # 注意landmarks裁剪后可能是负值
        landmarks = landmarks - [left, top]
        return {'image': image, 'landmarks': landmarks}
        
class ToTensor(object):
    """将numpy的ndarrays转换为 Tensor"""
    def __call__(self, sample):
        image, landmarks = sample['image'], sample['landmarks']
        # 将矩阵转换为 channel * height * width
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image), 'landmarks': torch.from_numpy(landmarks)}
    
# 将transform组合
scale = Rescale(256)
crop = RandomCrop(128)
composed = transforms.Compose([Rescale(256),  RandomCrop(224)])
fig = plt.figure()
sample = face_dataset[65]
print(type(sample))
for i, tsfrm in enumerate([scale, crop, composed]):
    transformed_sample = tsfrm(sample)
    ax = plt.subplot(1, 3, i+1)
    plt.tight_layout()
    ax.set_title(type(tsfrm).__name__)
    show_landmarks(**transformed_sample)
plt.show()

# 使用Dataloader遍历自定义的dataset
transfromed_dataset = FaceLandmarksDataset(csv_file='../data/faces/face_landmarks.csv',
                                           root_dir='../data/faces/',
                                           transform=transforms.Compose([Rescale(256),RandomCrop(224),ToTensor()]))
for i in range(len(transfromed_dataset)):
    sample = transfromed_dataset[i]
    print(i, sample['image'].size(), sample['landmarks'].size())
    if i == 3:
        break
# BrokenPipeError,则将num_workers 设置为0.
dataloader = DataLoader(transfromed_dataset, batch_size=4, shuffle=True, num_workers=0)
# 显示一个batch的函数
def show_landmarks_batch(sample_batched):
    """show image with landmarks for a batch of samples"""
    images_batch, landmarks_batch = sample_batched['image'], sample_batched['landmarks']
    batch_size = len(images_batch)
    im_size = images_batch.size(2)
    grid_border_size = 2
    grid = utils.make_grid(images_batch)
    plt.imshow(grid.numpy().transpose((1, 2, 0)))
    for i in range(batch_size):
        plt.scatter(landmarks_batch[i,:,0].numpy() + i*im_size +(i+1)*grid_border_size,
                    landmarks_batch[i,:,1].numpy() + grid_border_size, 
                    s = 10, marker='.', c='r')
        plt.title('Batch from dataloader')
for i_batch, sample_batched in enumerate(dataloader):
    print(i_batch, sample_batched['image'].size(),
          sample_batched['landmarks'].size())
    if i_batch == 3:
        plt.figure()
        show_landmarks_batch(sample_batched)
        plt.axis('off')
        plt.ioff()
        plt.show()
        break
"""
0 torch.Size([3, 224, 224]) torch.Size([68, 2])
1 torch.Size([3, 224, 224]) torch.Size([68, 2])
2 torch.Size([3, 224, 224]) torch.Size([68, 2])
3 torch.Size([3, 224, 224]) torch.Size([68, 2])
0 torch.Size([4, 3, 224, 224]) torch.Size([4, 68, 2])
1 torch.Size([4, 3, 224, 224]) torch.Size([4, 68, 2])
2 torch.Size([4, 3, 224, 224]) torch.Size([4, 68, 2])
3 torch.Size([4, 3, 224, 224]) torch.Size([4, 68, 2])
"""