1.卷积神经网络基本单元
- 空洞卷积
通过在卷积核中添加空洞(0元素),从而增大感受野,获取更多的信息。
感受野:在卷积神经网络中,决定某一层输出结果中一个元素所对应的输入层的区域大小,即特征映射上的一个元素所对应的输入图的区域大小。 - 转置卷积
作用:将特征图放大恢复到原来的尺寸
转置卷积是卷积的方向过程,即卷积操作的输入作为转置卷积的输出,卷积操作的输出作为转置卷积的输入。 - 二维卷积
针对自然语言的词嵌入进行二维卷积,是利用卷积神经网络对自然语言进行分类的关键步骤。
2.经典卷积神经网络 - LeNet-5
主要用于处理手写字体的识别。
输入:32x32灰度图像
结构:2卷积层+2pool+2*fullConnect
输出:10个特征数字
import torchvision.models as models
alxnet=models.alexnet()
import torchvision.models as models
googlenet=models.googlenet()
- VGG
在VGG网络中,通过使用多个较小的卷积核(3x3)的卷积层,来替代一个卷积核较大的卷积层。
使用小卷积核一方面可以减少参数,另一方面相当于进行了更多的特征映射,可以进一步增加网络的拟合能力。
import torchvision.models as models
vgg16=models.vgg16()
vgg19=models.vgg19()
- TextCNN
在网络结构中,针对一个句子的词嵌入使用一层卷积层进行文本信息的提取。
3.卷积神经网络识别Fashion-MNIST
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import time
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.utils.data as Data
from torchvision import transforms
from torchvision.datasets import FshionMNIST
train_data=FashionMNIST(root="./data/FashionMNIST",
train=True,
transform= transforms.ToTensor(),
download=False)
train_loader=Data.DataLoader(dataset=train_data,
batch_size=64,
shuffle=False,
num_workers=2)
print("batch num of train_loader is : ",len(train_loader))
for step,(b_x,b_y) in enumerate(train_loader):
if step>0:
break
batch_x=b_x.squeeze().numpy()
batch_y=b_y.numpy()
class_label=train_data.classes
class_label[0]="T-shirt"
plt.figure(figsize=(12,5))
for ii in np.arange(len(batch_y)):
plt.subplot(4,16,ii+1)
plt.imshow(batch_x[ii,:,:],cmap=plt.cm.gray)
plt.title(class_label[batch_y[ii]],size=9)
plt.axis("off")
plt.subplots_adjust(wspace=0.05)
test_data=FashionMNIST(root="./data/FashionMNIST",
train=False,
download=False)
test_data_x=test_data.data.type(torch.FloatTensor)/255.0
test_data_x=torch.unsqueeze(test_data_x,dim=1)
test_data_y=test_data.targets
print("test_data_x.shape:",test_data_x.shape)
print("test_data_y.shape:",test_data_y.shape)
class MyConvNet(nn.Module):
def __init__(self):
super(MyConvNet,self).__init__()
self.conv1=nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=3,
stride=1,
padding=1,
),
nn.ReLU(),
nn.AvgPool2d(
kernel_size=2,
stride=2,
)
)
self.conv2=nn.Sequential(
nn.Conv2d(16,32,3,1,0),
nn.ReLU(),
nn.AvgPool2d(2,2)
)
self.classifier=nn.Sequential(
nn.Linear(32*6*6,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,10)
)
def forward(self,x):
x=self.conv1(x)
x=self.conv2(x)
x=x.view(x.size(0),-1)
output=self.classifier(x)
return output
myconvnet=MyConvNet()
print(myconvnet)
def train_model(model,traindataloader,train_rate,criterion,optimizer,num_epochs=25):
batch_num=len(traindataloader)
train_batch_num=round(batch_num*train_rate)
best_model_wts=copy.deepcopy(model.state_dict())
best_acc=0.0
train_loss_all=[]
train_acc_all=[]
val_loss_all=[]
val_acc_all=[]
since=time.time()
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch,num_epochs-1))
print('-'*10)
train_loss=0.0
train_corrects=0
train_num=0
val_loss=0.0
val_corrects=0
val_num=0
for step,(b_x,b_y) in enumerate(traindataloader):
if step<train_batch_num:
model.train()
output=model(b_x)
pre_lab=torch.argmax(output,1)
loss=criterion(output,b_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss +=loss.item()*b_x.size(0)
train_corrects+=torch.sum(pre_lab==b_y.data)
train_num+=b_x.size(0)
else:
model.eval()
output=model(b_x)
pre_lab=torch.argmax(output,1)
loss=criterion(output,b_y)
val_loss+=loss.item()*b_x.size(0)
val_corrects+=torch.sum(pre_lab==b_y.data)
val_num+=b_x.size(0)
train_loss_all.appen(train_loss/trian_num)
train_acc_all.append(train_corrects.double().item()/train_num)
val_loss_all.append(val_loss/val_num)
val_acc_all.appen(val_corrects.double().item()/val_num)
print('{} Train Loss :{:.4f} Train Acc:{:4f}'.format(epoch,train_loss_all[-1],train_acc_all[-1]))
print('{} Val Loss :{:.4f} val Acc:{:4f}'.format(epoch,val_loss_all[-1],val_acc_all[-1]))
if val_acc_all[-1]>best_acc:
best_acc=val_acc_all[-1]
best_model_wts=copy.deepcopy(model.state_dict())
time_use=time.time()-since
print("Train and val complete in {:.0f}m {:.of}s".format(time_use//60,time_use%60))
model.load_stat_dict(best_model_wts)
train_process=pd.DataFrame(
data={"epoch":range(num_epochs),
"train_loss_all":train_loss_all,
"val_loss_all":val_loss_all,
"train_acc_all":train_acc_all,
"val_acc_all":val_acc_all}
)
return model,train_process
optimizer=torch.optim.Adam(myconvnet.parameters(),lr=0.0003)
criterion=nn.CrossEntropyLoss()
myconvnet,train_process=train_model(
myconvnet,train_loader,0.8,
criterion,optimizer,num_epochs=25
)
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(train_process.epoch,train_process.train_loss_all,"ro-",label="Train loss")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val loss")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.subplot(1,2,2)
plt.plot(train_process.epoch,train_process.train_acc_all,"ro-",label="Train acc")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val acc")
plt.xlabel("epoch")
plt.ylabel("acc")
plt.legend()
plt.show()
myconvnet.eval()
output=myconvnet(test_data_x)
pre_lab=torch.argmax(output,1)
acc=accuracy_score(test_data_y,pre_lab)
print("the accuracy of test data is ",acc)
conf_mat=confusion_matrix(test_data_y,pre_lab)
df_cm=pd.DataFrame(conf_mat,index=class_label,columns=class_label)
heatmap=sns.heatmap(df_cm,annot=True,fmt="d",cmap="Y1GnBu")
heatmap.yaxis.set_ticklables(heatmap.yaxis.get_ticklabels(),rotation=0,ha='right')
heatmap.xaxis.set_ticklables(heatmap.xaxis.get_ticklabels(),rotation=45,ha='right')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import time
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.utils.data as Data
from torchvision import transforms
from torchvision.datasets import FshionMNIST
train_data=FashionMNIST(root="./data/FashionMNIST",
train=True,
transform= transforms.ToTensor(),
download=False)
train_loader=Data.DataLoader(dataset=train_data,
batch_size=64,
shuffle=False,
num_workers=2)
print("batch num of train_loader is : ",len(train_loader))
for step,(b_x,b_y) in enumerate(train_loader):
if step>0:
break
batch_x=b_x.squeeze().numpy()
batch_y=b_y.numpy()
class_label=train_data.classes
class_label[0]="T-shirt"
plt.figure(figsize=(12,5))
for ii in np.arange(len(batch_y)):
plt.subplot(4,16,ii+1)
plt.imshow(batch_x[ii,:,:],cmap=plt.cm.gray)
plt.title(class_label[batch_y[ii]],size=9)
plt.axis("off")
plt.subplots_adjust(wspace=0.05)
test_data=FashionMNIST(root="./data/FashionMNIST",
train=False,
download=False)
test_data_x=test_data.data.type(torch.FloatTensor)/255.0
test_data_x=torch.unsqueeze(test_data_x,dim=1)
test_data_y=test_data.targets
print("test_data_x.shape:",test_data_x.shape)
print("test_data_y.shape:",test_data_y.shape)
class MyConvdilaNet(nn.Module):
def __init__(self):
super(MyConvdilaNet,self).__init__()
self.conv1=nn.Sequential(nn.Conv2d(1,16,3,1,1,dilation=2),
nn.ReLU(),
nn.AvgPool2d(2,2),
)
self.conv2=nn.Sequential(nn.Conv2d(16,32,3,1,0,dilation=2),
nn.ReLU(),
nn.AvgPool2d(2,2),
)
self.classifier=nn.Sequential(nn.Linear(32*4*4,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,10)
)
def forward(selfself,x):
x=self.conv1(x)
x=self.conv2(x)
x=x.view(x.size(0),-1)
output=self.classifier(x)
return output
myconvidilanet=MyConvdilaNet()
def train_model(model,traindataloader,train_rate,criterion,optimizer,num_epochs=25):
batch_num=len(traindataloader)
train_batch_num=round(batch_num*train_rate)
best_model_wts=copy.deepcopy(model.state_dict())
best_acc=0.0
train_loss_all=[]
train_acc_all=[]
val_loss_all=[]
val_acc_all=[]
since=time.time()
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch,num_epochs-1))
print('-'*10)
train_loss=0.0
train_corrects=0
train_num=0
val_loss=0.0
val_corrects=0
val_num=0
for step,(b_x,b_y) in enumerate(traindataloader):
if step<train_batch_num:
model.train()
output=model(b_x)
pre_lab=torch.argmax(output,1)
loss=criterion(output,b_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss +=loss.item()*b_x.size(0)
train_corrects+=torch.sum(pre_lab==b_y.data)
train_num+=b_x.size(0)
else:
model.eval()
output=model(b_x)
pre_lab=torch.argmax(output,1)
loss=criterion(output,b_y)
val_loss+=loss.item()*b_x.size(0)
val_corrects+=torch.sum(pre_lab==b_y.data)
val_num+=b_x.size(0)
train_loss_all.appen(train_loss/trian_num)
train_acc_all.append(train_corrects.double().item()/train_num)
val_loss_all.append(val_loss/val_num)
val_acc_all.appen(val_corrects.double().item()/val_num)
print('{} Train Loss :{:.4f} Train Acc:{:4f}'.format(epoch,train_loss_all[-1],train_acc_all[-1]))
print('{} Val Loss :{:.4f} val Acc:{:4f}'.format(epoch,val_loss_all[-1],val_acc_all[-1]))
if val_acc_all[-1]>best_acc:
best_acc=val_acc_all[-1]
best_model_wts=copy.deepcopy(model.state_dict())
time_use=time.time()-since
print("Train and val complete in {:.0f}m {:.of}s".format(time_use//60,time_use%60))
model.load_stat_dict(best_model_wts)
train_process=pd.DataFrame(
data={"epoch":range(num_epochs),
"train_loss_all":train_loss_all,
"val_loss_all":val_loss_all,
"train_acc_all":train_acc_all,
"val_acc_all":val_acc_all}
)
return model,train_process
optimizer=torch.optim.Adam(myconvidilanet.parameters(),lr=0.0003)
criterion=nn.CrossEntropyLoss()
myconvidilanet,train_process=train_model(
myconvidilanet,train_loader,0.8,
criterion,optimizer,num_epochs=25
)
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(train_process.epoch,train_process.train_loss_all,"ro-",label="Train loss")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val loss")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.subplot(1,2,2)
plt.plot(train_process.epoch,train_process.train_acc_all,"ro-",label="Train acc")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val acc")
plt.xlabel("epoch")
plt.ylabel("acc")
plt.legend()
plt.show()
myconvidilanet.eval()
output=myconvidilanet(test_data_x)
pre_lab=torch.argmax(output,1)
acc=accuracy_score(test_data_y,pre_lab)
print("the accuracy of test data is ",acc)
conf_mat=confusion_matrix(test_data_y,pre_lab)
df_cm=pd.DataFrame(conf_mat,index=class_label,columns=class_label)
heatmap=sns.heatmap(df_cm,annot=True,fmt="d",cmap="Y1GnBu")
heatmap.yaxis.set_ticklables(heatmap.yaxis.get_ticklabels(),rotation=0,ha='right')
heatmap.xaxis.set_ticklables(heatmap.xaxis.get_ticklabels(),rotation=45,ha='right')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
4.对训练好的卷积网络微调
基于预训练好的VGG16网络,对其网络结构进行微调
使用的数据集来自kaggle数据库中的10类猴子数据集,数据地址为https://www.kaggle.com/slothkong/10-monkey-species
根据预训练好的权重,提取数据特征,然后定义新的全连接层,用于图像的分类
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import hiddenlayer as hl
import torch
import torch.nn as nn
from torch.optim import Adam,SGD
import torch.utils.data as Data
from torchvision import transforms
from torchvision import models
from torchvision.datasets import ImageFloder
vgg16=models.vgg16(pretrained=True)
vgg=vgg16.features
for param in vgg.parameters():
param.requires_grad_(False)
class MyVggModel(nn.Module):
def __init__(self):
super(MyVggModel,self).__init__()
self.vgg=vggself.classifier=nn.Sequential(
nn.Linear(25088,512),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(512,256),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(256,10),
nn.softmax(dim=1)
)
def forward(self,x):
x=self.vgg(x)
x=x.view(x.size(0),-1)
output=self.classifer(x)
return output
Myvggc=MyVggModel()
ptiny(Myvggc)
train_data_transforms=transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
val_data_transforms=transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
train_data_dir="data/chap6/10-monkey-species/training"
train_data=ImageFloder(train_data_dir,transforms=train_data_transforms)
train_data_loader=Data.DataLoader(train_data,batch_size=32,shuffle=True,num_worker=2)
val_data_dir="data/chap6/10-monkey-species/validation"
val_data=ImageFloder(val_data_dir,transforms=val_data_transforms)
val_data_loader=Data.DataLoader(val_data,batch_size=32,shuffle=True,num_workers=2)
print("train sample data num:",len(train_data.targets))
print("test sample data num : ",len(val_data.targets))
for step,(b_x,b_y) in enumerate(train_data_loader):
if step>0:
break
mean=np.array([0.485,0.456,0.406])
std=np.array([0.229,0.224,0.225])
plt.figure(figsize=(12,6))
for ii in np.arange(len(b_y)):
plt.subplot(4,8,ii+1)
image=b_x[ii,:,:,:].numpy().transpose((1,2,0))
image=std*image+mean
image=np.clip(image,0,1)
plt.imshow(image)
plt.title(b_y[ii].data.numpy())
plt.axis("off")
plt.subplots_adjust(hspace=0.3)
optimizer=torch.optim.Adam(Myvggc.parameters(),lr=0.003)
loss_func=nn.CrossENtropyLoss()
history1=hl.History()
canvas1=hl.Canvas()
for epoch in range(10):
train_loss_epoch=0
val_loss_epoch=0
train_corrects=0
val_corrects=0
Myvggc.train()
for step,(b_x,b_y) in enumerate(train_data_loader):
output=Myvggc(b_x)
loss=loss_func(output,b_y)
pre_lab=torch.argmax(output,1)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss_epoch+=loss.item()*b_x.size(0)
train_corrects+=torch.sum(pre_lab==b_y.data)
train_loss=train_loss_epoch/len(train_data.targets)
train_acc=train_corrects.double()/len(train_data.targets)
Myvggc.eval()
for step,(val_x,val_y) in enumerate(val_data_loader):
output=Myvggc(val_x)
loss=loss_func(output,val_y)
pre_lab=torch.argmax(output,1)
val_loss_epoch+=loss.item()*val_x.size(0)
val_corrects+=torch.sum(pre_lab==val_y.data)
val_loss=val_loss_epoch/len(val_data.targets)
val_acc=val_corrects.double()/len(val_data.targets)
history1.log(epoch,train_loss=train_loss,
val_loss=val_loss,
train_acc=train_acc.item(),
val_acc=val_acc.item())
with canvas1:
canvas1.draw_plot([history1["train_loss"],history1["val_loss"]])
canvas1.draw_plot([history1["train_acc"],history1["val_acc"]])
5.卷积神经网络进行情感分类
使用的影评数据来自https://www.kaggle.com/iarunava/imdb-movie-reviews-dataset,是IMDB的电影影评数据。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import seaborn as sns
from wordcloud import WordCloud
import time
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtest import data
from torchvision import transforms
from torchtext.vocab import Vectors,GloVe
def load_text_data(path):
text_data=[]
label=[]
for dset in ["pos","neg"]:
path_dset=os.path.join(path,dset)
path_list=os.listdir(path_dset)
for fname in path_list:
if fname.endswith(".txt"):
filename=os.path.join(path_dset,fname)
with open(filename) as f:
text_data.append(f.read())
if dset=="pos":
label.append(1)
else:
label.append(0)
return np.array(text_data),np.array(label)
train_path="data/chap6/imdb/train"
train_text,train_label=load_text_data(train_path)
test_path="data/chap6/imdb/test"
test_text,test_label=load_text_data(test_path)
print(len(train_text),len(train_label))
print(len(test_text),len(test_label))
def text_preprocess(text_data):
text_pre=[]
for text1 in text_data:
text1=re.sub("<br /><br />"," ",text1)
text1=text1.lower()
text1=re.sub("\d+","",text1)
text1=text1.translate(str.maketrans("","",string.punctuation.replace("'","")))
text1=text1.strip()
text_pre.appen(text1)
return np.array(text_pre)
train_text_pre=text_preprocess(train_text)
test_text_pre=text_preprocess(test_text)
def stop_stem_word(datalist,stop_words):
datalist_pre=[]
for text in datalist:
text_words=word_tokenize(text)
text_words=[word for word in text_words if not word in stop_words]
text_words=[word for word in text_words if len(re.findall("'",word))==0]
datalist_pre.append(text_words)
return np.array(datalist_pre)
stop_words=stopwords.words("english")
stop_words=set(stop_words)
train_text_pre2=stop_stem_word(train_text_pre,stop_words)
test_text_pre2=stop_stem_word(test_text_pre,stop_words)
print(train_text_pre[10000])
print("="*10)
print(train_text_pre2[10000])
texts=[" ".join(words) for words in train_text_pre2]
traindatasave=pd.DataFrame({"text":texts,
"label":train_label})
texts=[" ".join(words) for words in test_text_pre2]
testdatasave=pd.DataFrame({"text":texts,
"label":test_label})
traindatasave.to_csv("data/chap6/imdb_train.csv",index=False)
testdatasave.to_csv("data/chap6/imdb_test.csv",index=False)
traindata=pd.DataFrame({"train_text":train_text,"train_word":train_text_pre2,"trian_label":train_label})
train_word_num=[len(text) for text in train_text_pre2]
traindata["train_word_num"]=train_word_num
plt.figure(figsize=(8,5))
_=plt.hist(train_word_num,bins=100)
plt.xlabel("word number")
plt.ylabel("Freq")
plt.show()
plt.figure(figsize=(16,10))
for ii in np.unique(train_label):
text=np.array(traindata.train_word[traindata.train_label == ii])
text=" ".join(np.concatenate(text))
plt.subplot(1,2,ii+1)
wordcod.generate_from_text(text)
plt.imshow(wordcod)
plt.axis("off")
if ii==1:
plt.title("Positive")
else:
plt.title("Negative")
plt.subplots_adjust(wspace=0.05)
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import seaborn as sns
from wordcloud import WordCloud
import time
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtest import data
from torchvision import transforms
from torchtext.vocab import Vectors,GloVe
mytokenize=lambda x:x.split()
TEXT=data.Field(sequential=True,tokenize=mytokenize,include_lengths=True,use_vocab=True,batch_first=True,fix_length=200)
LABEL=data.Field(sequential=False,use_vocab=False,pad_token=None,unk_token=None)
train_test_fields=[
("label",LABEL),
("text",TEXT)
]
traindata,testdata=data.TabularDataset.splits(
path="./data/chap6",format="csv",
train="imdb_train.csv",fields=train_test_fields,
test="imdb_test.csv",skip_header=True
)
len(traindata),len(testdata)
train_data,val_data=traindata.split(split_ratio=0.7)
len(train_data),len(val_data)
vec=Vectors("glove.6B.100d.txt","./data")
TEXT.build_vocab(train_data,max_size=20000,vectors=vec)
LABEL.build_vocab(train_data)
print(TEXT.vocab.freqs.most_common(n=10))
print("词典的词数 : ",len(TEXT.vocab.itos))
print("前 10 个单词: \n",TEXT.vocab.itos[0:10])
print("类别标签情况: ",LABEL.vocab.freqs)
BATCH_SIZE=32
train_iter=data.BucketIterator(train_data,batch_size=BATCH_SIZE)
val_iter=data.BucketIterator(val_data,batch_size=BATCH_SIZE)
test_iter=data.BucketIterator(testdata,batch_size=BATCH_SIZE)
for step,batch in enumerate(train_iter):
if step>0:
break
print("数据的尺寸: ",batch.text[0].shape)
print("数据的类别标签: \n"batch.lable)
print("数据样本数 : ",len(batch.text[1]))
class CNN_Text(nn.Modules):
def __init__(self,vocab_size,embedding_dim,n_filters,filter_sizes,output_dim,dropout,pad_idx):
super().__init__()
self.embedding=nn.Embedding(vocab_size,embedding_dim,padding_idx=pad_idx)
seld.convs-nn.ModuleList([
nn.Conv2d(in_channels=1,out_channels=n_filters,kernel_size=(fs,embedding_dim)) for fs in filter_sizes
])
self.fc=nn.Linear(len(filter_sizes)*n_filters,output_dim)
self.dropout=nn.Dropout(dropout)
def forward(selfself,text):
embedded=self.embedding(text)
embedded=embedded.unsqueeze(1)
conved=[F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
pooled=[F.max_pool1d(conv,conv.shape[2]).squeeze(2) for conv in conved]
cat=self.dropout(torch.cat(pooled,dim=1))
return self.fc(cat)
INPUT_DIM=len(TEXT.vocab)
EMBEDDING_DIM=100
N_FILTERS=100
FILTER_SIZES=[3,4,5]
OUTPUT_DIM=1
DROPOUT=0.5
PAD_IDX=TEXT.vocab.stoi[TEXT.pad_token]
model=CNN_Text(INPUT_DIM,EMBEDDING_DIM,N_FILTERS,FILTER_SIZES,OUTPUT_DIM,DROPOUT,PAD_IDX)
model
pretrained_embeddings=TEXT.vocab.vectors
model.embedding.weigth.data.copy_(pretrained_embeddings)
UNK_IDX=TEXT.vocab.stoi[TEXT.unk_token]
model.embedding.weight.data[UNK_IDX]=torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX]=torch.zeros(EMBEDDING_DIM)
optimizer=optim.Adam(model.parameters())
criterion=nn.BCEWithLogitsLoss()
def train_epoch(model,iterator,optimizer,criterion):
epoch_loss=0
epoch_acc=0
trian_corrects=0
train_num=0
model.train()
for batch in iterator:
optimizer.zero_grad()
pre=model(batch.text[0]).squeeze(1)
loss=criterion(pre,batch.label.type(torch.FloatTensor))
pre_lab=torch.round(torch.sigmoid(pre))
train_corrects+=torch.sum(pre_lab.long()==batch.label)
train_num+=len(batch.label)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_loss=epoch_loss/train_num
epoch_acc=train_corrects.double().item()/train_num
return epoch_Loss,epoch_acc
def evaluate(model,iterator,criterion):
epoch_loss =0;
epoch_acc=0;
train_corrects=0;
trian_num=0;
model.eval()
with torch.no_grad():
for batch in iterator:
pre=model(batch.text[0]).seqeeze(1)
loss=criterion(pre,batch.label.type(torch.FloatTensor))
pre_lab=torch.round(torch.sigmoid(pre))
train_corrects+=torch.sum(pre_lab.long() == batch.label)
trian_num+=len(batch.label)
epoch_loss+=loss.item()
epoch_loss = epoch_loss/train_num
epoch_acc = train_corrects.double().item()/trian_num
return epoch_loss,epoch_acc
EPOCHS=10
best_val_loss=float("inf")
best_acc=float(0)
for epoch in range(EPOCHS):
start_time=time.time()
train_loss,train_acc=train_epoch(model,train_iter,optimizer,criterion)
val_loss,val_acc=evaluate(model,val_iter,criterion)
end_time=time.time()
print("Epoch: ",epoch+1,"|","EPoch TIme: ",end_time-start_time,"s")
print("Train Loss: "train_loss,"|","Train acc: ",train_acc)
print("Val.Loss: ",val_loss,"|","Val.Acc: ",val_acc)
if(val_loss<best_val_loss) & (val_acc>best_acc):
best_model_wts=copy.deepcopy(model.state_dict())
best_val_loss=val_loss
best_acc=val_acc
model.load_state_dict(best_model_wts)
6.使用预训练好的卷积网络
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchvision import transforms
from PIL import Image
vgg16=models.vgg16(pretrained=True)
im=Image.open("data/chess.jpg")
imarray=np.asarray(im)/255.0
plt.figure()
plt.imshow(imarray)
plt.show()
data_transforms=transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
input_im=data_transforms(im).unsqueeze(10)
print("input_im.shape:",input_im.shape)
activation=[]
def get_activation(name):
def hook(model,input,output):
activation[name]=output.detach()
return hook()
vgg16.features[4].register_forward_hook(get_activation("maxpool1"))
_=vgg16(input_im)
maxpool1=activation["maxpool1"]
print("获取特征的尺寸为: ",maxpool1.shape())
plt.figure(figsize=(11,6))
for ii in range(maxpool1.shape[1]):
plt.subplot(6,11,ii+1)
plt.imshow(maxpool1.data.numpy()[0,ii,:,:],cmap="gray")
plt.axis("off")
plt.subplots_adjust(wspace=0.1,hspace=0.1)
plt.show()
vgg16.eval()
vgg16.features[21].register_forward_hook(get_activation("layer21_conv"))
_=vgg16(input_im)
layer21_conv=activation["layer21_conv"]
print("获取特征的尺寸为:" layer21_conv.shape)
plt.figure(figsize=(12,6))
for ii in range(72):
plt.subplot(6,12,ii+1)
plt.imshow(layer21_conv.data.numpy()[0,ii,:,:],cmap="gray")
plt.axis("off")
plt.subplots_adjust(wspace=0.1,hspace=0.1)
plt.show()
LABELS_URL="https://s3.amazonaws.com/outcome-blog/imagenet/labels.json"
response=requests.get(LABELS_URL)
labels=(int(key):value for key,value in response.json().items())
vgg16.eval()
im_pre=vgg16(input_im)
softmax=nn.Softmax(dim=1)
im_pre_prob=softmax(im_pre)
prob,prelab=torch.topk(im_pre_prob,5)
prob=prob.data.numpy().flatten()
prelab=prelab.numpy().flatten()
for ii,lab in enumerate(prelab):
print("index: ",lab," label: ",labels[lab]," ||",prob[ii])
####热力图
class MyVgg16(nn.Module):
def __init__(self):
super(MyVgg16, self).__init__()
self.vgg=models.vgg16(pretrained=True)
self.features_conv=self.vgg.features[:30]
self.max_pool=self.vgg.features[30]
self.avgpool=self.vgg.avgpool
self.classifier=self.vgg.classifier
self.gradients=None
def activations_hook(self,grad):
self.gradients=grad
def forward(self,x):
x=self.features_conv(x)
h=x.register_hook(self.activations_hook)
x=self.max_pool(x)
x=self.avgpool(x)
x=x.view(1,-1)
x=self.classifier(x)
return x
def get_activations_gradient(self):
return self.gradients
def get_activations(self,x):
return self.features_conv(x)
vggcam=MyVgg16()
vggcam.eval()
im_pre=vggcam(input_im)
softmax=nn.Softmax(dim=1)
im_pre_prob=softmax(im_pre)
prob,prelab=torch.topk(im_pre_prob,5)
prob=prob.data.numpy().flatten()
prelab=prelab.numpy().flatten()
for ii,lab in enumerate(prelab):
print("index: ",lab,"label: ",labels[lab]," || ",prob[ii])
im_pre[:,prelab[0]].backward()
gradients=vggcam.get_activations_gradient()
mean_gradients=torch.mean(gradients,dim=[0,2,3])
activations=vggcam.get_activations(input_im).detach()
for i in range(len(mean_gradients)):
activations[:,i,:,:]*=mean_gradients[i]
heatmap=torch.mean(activations,dim=1).squeeze()
heatmap=F.relu(heatmap)
heatmap/=torch.max(heatmap)
heatmap=heatmap.numpy()
plt.matshow(heatmap)
img=cv2.imread("data/chap6/大象.jpg")
heatmap=cv2.resize(heatmap,(img.shape[1],img.shape[0]))
heatmap=np.uint8(255*heatmap)
heatmap=cv2.applyColorMap(heatmap,cv2.COLORMAP_JET)
Grad_cam_img=heatmap*0.4+img
Grad_cam_img=Grad_cam_img/Grad_cam_img.max()
b,g,r=cv2.split(Grad_cam_img)
Grad_cam_img=cv2.merge([r,g,b])
plt.figure()
plt.imshow(Grad_cam_img)
plt.show()