CIFAR10练习
安装anaconda中没有的包可以在anaconda cloud和python中查找。本次主要是对CIFAR10进行分类(只是简单的分类,代码需要优化),实验代码如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from PIL import Image
import pandas as pd
import seaborn as sns
from torch.nn import Linear,Module,MSELoss
from torch.optim import SGD
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler
import time ###时间
transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]#### 这里需要注意
)
trainset=torchvision.datasets.CIFAR10('./data',
train=True,download=False,transform=transform)###下载一次后,可以设置为download=False
testset=torchvision.datasets.CIFAR10(
'./data',train=False,download=False,transform=transform
)
classess={'plane','car','bird','cat','deer',
'dog','frog','horse','ship','truck'}
valid_size=0.2
num_train=len(trainset)
indices=list(range(num_train))
np.random.shuffle(indices)
split=int(np.floor(valid_size*num_train))
train_idx,valid_idx=indices[split:],indices[:split]
train_sampler=SubsetRandomSampler(train_idx)
valid_sampler=SubsetRandomSampler(valid_idx)
trainloader=torch.utils.data.DataLoader(trainset,
batch_size=16,sampler=train_sampler,num_workers=2)
validloader=torch.utils.data.DataLoader(
trainset,batch_size=16,sampler=valid_sampler,num_workers=2
)
testloader=torch.utils.data.DataLoader(
testset,batch_size=16,shuffle=False,num_workers=2
)
EPOCH=20
valid_loss_min=np.Inf
class myNet(nn.Module):
def __init__(self):
super(myNet,self).__init__()
self.conv1=nn.Conv2d(3,6,5)
self.pool=nn.MaxPool2d(2,2)
self.conv2=nn.Conv2d(6,16,5)
self.fc1=nn.Linear(16*5*5,120)
self.fc2=nn.Linear(120,84)
self.fc3=nn.Linear(84,10)
def forward(self,x):
x=F.relu(self.conv1(x))
x=self.pool(x)
x=F.relu(self.conv2(x))
x=self.pool(x)
x=x.view(-1,16*5*5) ### 这里有问题
x=F.relu(self.fc1(x))
x=F.relu(self.fc2(x))
x=F.log_softmax(self.fc3(x),dim=1)#####这里要注意
return x
if __name__=='__main__':
net=myNet()
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(net.parameters(),lr=0.01,momentum=0.9)
train_loss_list=[]
valid_loss_list=[]
for epoch in range(EPOCH):
since=time.time()
print('-' *10)
print('Epoch {}/{}'.format(epoch+1,EPOCH))
train_loss=0.0
valid_loss=0.0
net.train()
for i,data in enumerate(trainloader,0):
inputs,labels=data
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,labels)
loss.backward()
optimizer.step()
train_loss+=loss.item()*inputs.size(0)
net.eval()
for i,data in enumerate(validloader,0):
inputs,labels=data
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,labels)
loss.backward()
optimizer.step()
valid_loss+=loss.item()*inputs.size(0)
train_loss=train_loss/len(trainloader.sampler)
valid_loss=valid_loss/len(validloader.sampler)
time_elapsed=time.time()-since
print('Train Loss:{:.4f} Valid Loss:{:.4f}'.format(train_loss,valid_loss))
print('time: {:.0f} m {:.0f} s'.format(time_elapsed//60,time_elapsed%60))
train_loss_list.append(train_loss)
valid_loss_list.append(valid_loss)
if valid_loss<=valid_loss_min:
print('Valid Loss decreased.-----Saving model ...')
torch.save(net.state_dict(),'test_model.pt')
valid_loss_min=valid_loss
print('finish training')
plt.plot(range(1,len(train_loss_list)+1),train_loss_list,'bo',label='Train Loss')
plt.plot(range(1,len(valid_loss_list)+1),valid_loss_list,'r',label='Valid Loss')
plt.legend()
plt.savefig('Loss.png')
correct=0
total=0
with torch.no_grad():
for data in testloader:
images,labels=data
outputs=net(images)
_,pre=torch.max(outputs.data,1)
total+=labels.size(0)
correct+=(pre==labels).sum().item()
print('acc of net test image: %d %%'%
(100*correct/total))
只是一个基本测试,还可以继续训练。在增加epoch后,Loss居然上升了(需要调整结构),结果如下:
在增加dropout后,依然如此,结果如下:
训练几个epoch,Loss就增加,可以通过减小学习率LR来改善情况,结果如下(之后再继续训练):
将epoch修改为110,结果如下:
Acc of plane: 73% (736/1000)
Acc of car: 80% (804/1000)
Acc of bird: 59% (596/1000)
Acc of cat: 45% (457/1000)
Acc of deer: 63% (637/1000)
Acc of dog: 52% (520/1000)
Acc of frog: 79% (794/1000)
Acc of horse: 73% (730/1000)
Acc of ship: 78% (789/1000)
Acc of truck: 79% (799/1000)
Test Acc: 68% (6862/10000)
说明:之前的代码中的FC3用了log_softmax作为激活函数,对应的损失函数应该是nllLoss。之前的代码此处有问题,损失函数如果选用nn.CrossEntropyLoss(),则不用明确写出FC3的激活函数,因为在调用nn.CrossEntropyLoss()时,会自动先经过softmax激活。本次训练的完整代码如下:
transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]#### 这里需要注意
)
trainset=torchvision.datasets.CIFAR10('./data',
train=True,download=False,transform=transform)###下载一次后,可以设置为download=False
testset=torchvision.datasets.CIFAR10(
'./data',train=False,download=False,transform=transform
)
classess=['plane','car','bird','cat','deer',
'dog','frog','horse','ship','truck']
valid_size=0.2
num_train=len(trainset)
indices=list(range(num_train))
np.random.shuffle(indices)
split=int(np.floor(valid_size*num_train))
train_idx,valid_idx=indices[split:],indices[:split]
train_sampler=SubsetRandomSampler(train_idx)
valid_sampler=SubsetRandomSampler(valid_idx)
trainloader=torch.utils.data.DataLoader(trainset,
batch_size=16,sampler=train_sampler,num_workers=2)
validloader=torch.utils.data.DataLoader(
trainset,batch_size=16,sampler=valid_sampler,num_workers=2
)
testloader=torch.utils.data.DataLoader(
testset,batch_size=16,shuffle=False,num_workers=2
)
EPOCH=110 ###60 ---80 ---
valid_loss_min=np.Inf
class myNet(nn.Module):
def __init__(self):
super(myNet,self).__init__()
self.conv1=nn.Conv2d(3,6,5)
self.pool=nn.MaxPool2d(2,2)
self.conv2=nn.Conv2d(6,16,5)
self.fc1=nn.Linear(16*5*5,120)
self.fc2=nn.Linear(120,84)
self.fc3=nn.Linear(84,10)
self.dropout=nn.Dropout(0.25)###6.6 add
def forward(self,x):
x=F.relu(self.conv1(x))
x=self.pool(x)
x=F.relu(self.conv2(x))
x=self.pool(x)
x=x.view(-1,16*5*5) ### 这里有问题
x=self.dropout(x) ###6.6 add
x=F.relu(self.fc1(x))
x=self.dropout(x) ###6.6 add
x=F.relu(self.fc2(x))
#x=F.log_softmax(self.fc3(x),dim=1)#####这里要注意
x=self.fc3(x)
return x
if __name__=='__main__':
net=myNet()
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(net.parameters(),lr=0.001,momentum=0.9,weight_decay=1e-6)###lr=0.01SGD
train_loss_list=[]
valid_loss_list=[]
for epoch in range(EPOCH):
since=time.time()
print('-' *10)
print('Epoch {}/{}'.format(epoch+1,EPOCH))
train_loss=0.0
valid_loss=0.0
net.train()
for i,data in enumerate(trainloader,0):
inputs,labels=data
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,labels)
loss.backward()
optimizer.step()
train_loss+=loss.item()*inputs.size(0)
net.eval()
for i,data in enumerate(validloader,0):
inputs,labels=data
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,labels)
loss.backward()
optimizer.step()
valid_loss+=loss.item()*inputs.size(0)
train_loss=train_loss/len(trainloader.sampler)
valid_loss=valid_loss/len(validloader.sampler)
time_elapsed=time.time()-since
print('Train Loss:{:.4f} Valid Loss:{:.4f}'.format(train_loss,valid_loss))
print('time: {:.0f} m {:.0f} s'.format(time_elapsed//60,time_elapsed%60))
train_loss_list.append(train_loss)
valid_loss_list.append(valid_loss)
if valid_loss<=valid_loss_min:
print('Valid Loss decreased.-----Saving model ...')
torch.save(net.state_dict(),'test_model_xiu110.pt')
valid_loss_min=valid_loss
print('finish training')
plt.plot(range(1,len(train_loss_list)+1),train_loss_list,'bo',label='Train Loss')
plt.plot(range(1,len(valid_loss_list)+1),valid_loss_list,'r',label='Valid Loss')
plt.legend()
plt.savefig('Loss_xiu110.png')
在去掉了weight_decay(通过添加weight_decay到梯度来实现L2正则化,参考weight_decay)后,感觉差别不大。
Acc of plane: 74% (746/1000)
Acc of car: 79% (790/1000)
Acc of bird: 54% (542/1000)
Acc of cat: 51% (512/1000)
Acc of deer: 63% (631/1000)
Acc of dog: 50% (504/1000)
Acc of frog: 74% (749/1000)
Acc of horse: 67% (678/1000)
Acc of ship: 73% (736/1000)
Acc of truck: 72% (729/1000)
Test Acc: 66% (6617/10000)
更改模型结构后(将核size=5换为size=3),acc明显提高了,结果如下:
Acc of plane: 80% (808/1000)
Acc of car: 89% (899/1000)
Acc of bird: 69% (693/1000)
Acc of cat: 57% (579/1000)
Acc of deer: 76% (764/1000)
Acc of dog: 67% (673/1000)
Acc of frog: 84% (844/1000)
Acc of horse: 77% (779/1000)
Acc of ship: 89% (891/1000)
Acc of truck: 83% (836/1000)
Test Acc: 77% (7766/10000)
说明:valid_loss<train_loss,可能是因为在训练时加入了dropout,通常它会在训练时激活丢失,在验证集上进行评估时(net.eval()模式下)会停用dropout。原因主要参考这个,另一个说明。
代码如下:
transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]#### 这里需要注意
)
trainset=torchvision.datasets.CIFAR10('./data',
train=True,download=False,transform=transform)###下载一次后,可以设置为download=False
testset=torchvision.datasets.CIFAR10(
'./data',train=False,download=False,transform=transform
)
classess=['plane','car','bird','cat','deer',
'dog','frog','horse','ship','truck']
valid_size=0.2
num_train=len(trainset)
indices=list(range(num_train))
np.random.shuffle(indices)
split=int(np.floor(valid_size*num_train))
train_idx,valid_idx=indices[split:],indices[:split]
train_sampler=SubsetRandomSampler(train_idx)
valid_sampler=SubsetRandomSampler(valid_idx)
trainloader=torch.utils.data.DataLoader(trainset,
batch_size=16,sampler=train_sampler,num_workers=2)
validloader=torch.utils.data.DataLoader(
trainset,batch_size=16,sampler=valid_sampler,num_workers=2
)
testloader=torch.utils.data.DataLoader(
testset,batch_size=16,shuffle=False,num_workers=2
)
EPOCH=110 ###60 ---80 --- 110
valid_loss_min=np.Inf
class myNet(nn.Module):
def __init__(self):
super(myNet,self).__init__()
self.conv1=nn.Conv2d(3,16,3,padding=1)
self.pool=nn.MaxPool2d(2,2)
self.conv2=nn.Conv2d(16,32,3,padding=1)
self.conv3=nn.Conv2d(32,64,3,padding=1)
self.fc1=nn.Linear(64*4*4,500)
self.fc2=nn.Linear(500,10)
self.dropout=nn.Dropout(0.25)###6.6 add
def forward(self,x):
x=F.relu(self.conv1(x))
x=self.pool(x)
x=F.relu(self.conv2(x))
x=self.pool(x)
x=F.relu(self.conv3(x))
x=self.pool(x)
x=x.view(-1,64*4*4) ### 这里有问题
x=self.dropout(x) ###6.6 add
x=F.relu(self.fc1(x))
x=self.dropout(x) ###6.6 add
x=self.fc2(x)
return x
if __name__=='__main__':
net=myNet()
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(net.parameters(),lr=0.001,momentum=0.9,weight_decay=1e-6)###lr=0.01SGD
train_loss_list=[]
valid_loss_list=[]
for epoch in range(EPOCH):
since=time.time()
print('-' *10)
print('Epoch {}/{}'.format(epoch+1,EPOCH))
train_loss=0.0
valid_loss=0.0
net.train()
for i,data in enumerate(trainloader,0):
inputs,labels=data
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,labels)
loss.backward()
optimizer.step()
train_loss+=loss.item()*inputs.size(0)
net.eval()
for i,data in enumerate(validloader,0):
inputs,labels=data
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,labels)
loss.backward()
optimizer.step()
valid_loss+=loss.item()*inputs.size(0)
train_loss=train_loss/len(trainloader.sampler)
valid_loss=valid_loss/len(validloader.sampler)
time_elapsed=time.time()-since
print('Train Loss:{:.4f} Valid Loss:{:.4f}'.format(train_loss,valid_loss))
print('time: {:.0f} m {:.0f} s'.format(time_elapsed//60,time_elapsed%60))
train_loss_list.append(train_loss)
valid_loss_list.append(valid_loss)
if valid_loss<=valid_loss_min:
print('Valid Loss decreased.-----Saving model ...')
torch.save(net.state_dict(),'test_model_3_110.pt')
valid_loss_min=valid_loss
print('finish training')
plt.plot(range(1,len(train_loss_list)+1),train_loss_list,'bo',label='Train Loss')
plt.plot(range(1,len(valid_loss_list)+1),valid_loss_list,'r',label='Valid Loss')
plt.legend()
plt.savefig('Loss_3_110.png')
为了验证是否是dropou影响,去掉了dropout又进行了训练,结果如下:
Acc of plane: 81% (819/1000)
Acc of car: 84% (842/1000)
Acc of bird: 67% (672/1000)
Acc of cat: 56% (567/1000)
Acc of deer: 73% (733/1000)
Acc of dog: 66% (666/1000)
Acc of frog: 80% (804/1000)
Acc of horse: 79% (792/1000)
Acc of ship: 85% (858/1000)
Acc of truck: 82% (827/1000)
Test Acc: 75% (7580/10000)
待解决问题:为什么train_loss和valid_loss会是一条线?
MNIST练习
直接使用CIFAR10的网络结构,显示了错误。其实是最后全连接的输入写错了,卷积层的输出是向下取整,我把3.5写成看4,应该是3才对。激活函数F.log_softmax与F.nll_loss对应,最后的输出层也可以不写激活函数,在损失函数中直接用nn.CrossEntropyLoss。同样的网络MNIST训练几个epoch就有很好的acc了,应该是训练集大小的原因,训练结果如下:
Acc of 0: 99% (976/980)
Acc of 1: 99% (1124/1135)
Acc of 2: 99% (1025/1032)
Acc of 3: 99% (1009/1010)
Acc of 4: 99% (973/982)
Acc of 5: 99% (887/892)
Acc of 6: 99% (949/958)
Acc of 7: 98% (1013/1028)
Acc of 8: 99% (967/974)
Acc of 9: 98% (992/1009)
Test Acc: 99% (9915/10000)
ACC代码如下:
net=myNet()
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(net.parameters(),lr=0.001,momentum=0.9,weight_decay=1e-6)
net.load_state_dict(torch.load('MN_model.pt'))
correct=0
total=0
test_loss=0.0
class_correct=list(0. for i in range(10))
class_total=list(0. for i in range(10))
net.eval()
for batch_idx,data in enumerate(testloader,0):
inputs,labels=data
optimizer.zero_grad()
outputs=net(inputs)
loss=criterion(outputs,labels)
test_loss+=loss.item()*inputs.size(0)
_,pred=torch.max(outputs,1)
correct_tensor=pred.eq(labels.data.view_as(pred))
correct=np.squeeze(correct_tensor.cpu().numpy())
for i in range(16):
label=labels.data[i]
class_correct[label]+=correct[i].item()
class_total[label]+=1
test_loss=test_loss/len(testloader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(10):
if class_total[i]>0:
print('Acc of %5s: %2d%% (%2d/%2d)' %(
classess[i],100*class_correct[i]/class_total[i],
np.sum(class_correct[i]),np.sum(class_total[i]))
)
else:
print('%5s:N/A' %(classess[i]))
print('Test Acc: %2d%% (%2d/%2d)'%(
100.*np.sum(class_correct)/np.sum(class_total),
np.sum(class_correct),np.sum(class_total)
))
猫狗分类
损失函数相关参考:损失函数介绍,在载入数据时,各种报错:
Traceback (most recent call last):
File "c:/Users/Administrator/Desktop/testTorch/test.py", line 288, in <module>
for i,data in enumerate(train_data_gen,0):
File "D:\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 193, in __iter__
return _DataLoaderIter(self)
File "D:\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 469, in __init__
w.start()
File "D:\Anaconda3\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "D:\Anaconda3\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "D:\Anaconda3\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "D:\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 89, in __init__
reduction.dump(process_obj, to_child)
File "D:\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle module objects
PS C:\Users\Administrator\Desktop\testTorch> Traceback (most recent call last):
File "<string>", line 1, in <module>
File "D:\Anaconda3\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "D:\Anaconda3\lib\multiprocessing\spawn.py", line 115, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
后来发现是
train=ImageFolder('C:/Users/Administrator/Desktop/gcData/train/',transform)
中的transform错写成transforms了,以后别把变量和库里的函数名弄得相似!!!
还有就是关于self.fc2=nn.Linear(512,2)中,输出是1还是2的问题(pytorch的二分类例程好多都是输出2。例如这个,也有是1的,如这个)。之前在Keras中二分类的输出应该是dense(1),激活函数用sigmoid,损失函数用二进制交叉熵,按照这个思路,我在pytorch中将输出层写成self.fc2=nn.Linear(512,1),激活函数用torch.sigmoid,损失函数用nn.BCELoss(),但是会报错,将1改为2依然报错。之后我将输出层写成self.fc2=nn.Linear(512,2),损失函数用nn.CrossEntropyLoss()则可以正常运行。
本次训练看60个epoch,结果如下,曲线很奇怪。应该是学习率LR太小,收敛的太慢。再加上epoch太少,还没有完全收敛,所以曲线很奇怪,以上都只是猜想,需要进行验证。