Facenet
- -Model(pytorch版本
《FaceNet:A Unified Embedding for Face Recognition and Clustering 》
—Facenet:一种人脸识别与聚类的统一嵌入表示
作者:Florian Schroff,Dmitry Kalenichenko
单位:Google
发表会议及时间:CVPR 2015
人脸识别与传统的深度图像识别区别还是挺大的
补充:其主要方法和损失函数依旧是业界的主流方案
补充:人脸识别模型排行榜:(其实在facenet的准确度就是99.6,哈哈,其后的很多网络,一直升到了99.8几)
一 论文导读
- 人脸识别的主要任务
人脸识别问题可分开集和闭集(开闭集的区别就是你识别的人脸是否固定,闭集就是分类问题,开集就只用分类的思想是不行的)
人脸识别的主要难度就是对于开集如何识别
- 人脸识别方法的演进:
趋势:手工设计特征 —》 深度学习特征表示
人脸识别的挑战:
- 深度学习又称表示学习
把人脸映射到向量
下图是最普通的欧式距离
2007?8-LFW:这个数据集掀起了深度学习人脸识别的序幕
2014-CASIA:第一个为现代深度学习人脸识别提供了海量样本
目前:MS-celeb-1M 和 Megaface是主要的人脸识别使用训练数据集、验证的是左侧的CALFW等
- 深度学习两种主流方法:
facenet属于左侧
- 损失函数
- 模型排行榜
二 论文精读
Facene提出了两个主要问题:
三 代码实现
只是部分代码:
train.py
import torch
import numpy as np
def fit(train_loader, val_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval, metrics=[],
start_epoch=0):
"""
Loaders, model, loss function and metrics should work together for a given task,
i.e. The model should be able to process data output of loaders,
loss function should process target output of loaders and outputs from the model
Examples: Classification: batch loader, classification model, NLL loss, accuracy metric
Siamese network: Siamese loader, siamese model, contrastive loss
Online triplet learning: batch loader, embedding model, online triplet loss
"""
for epoch in range(0, start_epoch):
scheduler.step()
for epoch in range(start_epoch, n_epochs):
scheduler.step()
# Train stage
train_loss, metrics = train_epoch(train_loader, model, loss_fn, optimizer, cuda, log_interval, metrics)
message = 'Epoch: {}/{}. Train set: Average loss: {:.4f}'.format(epoch + 1, n_epochs, train_loss)
for metric in metrics:
message += '\t{}: {}'.format(metric.name(), metric.value())
val_loss, metrics = test_epoch(val_loader, model, loss_fn, cuda, metrics)
val_loss /= len(val_loader)
message += '\nEpoch: {}/{}. Validation set: Average loss: {:.4f}'.format(epoch + 1, n_epochs, val_loss)
for metric in metrics:
message += '\t{}: {}'.format(metric.name(), metric.value())
print(message)
def train_epoch(train_loader, model, loss_fn, optimizer, cuda, log_interval, metrics):
for metric in metrics:
metric.reset()
model.train()
losses = []
total_loss = 0
for batch_idx, (data, target) in enumerate(train_loader):
target = target if len(target) > 0 else None
if not type(data) in (tuple, list):
data = (data,)
if cuda:
data = tuple(d.cuda() for d in data)
if target is not None:
target = target.cuda()
optimizer.zero_grad()
outputs = model(*data)
if type(outputs) not in (tuple, list):
outputs = (outputs,)
loss_inputs = outputs
if target is not None:
target = (target,)
loss_inputs += target
loss_outputs = loss_fn(*loss_inputs)
loss = loss_outputs[0] if type(loss_outputs) in (tuple, list) else loss_outputs
losses.append(loss.item())
total_loss += loss.item()
loss.backward()
optimizer.step()
for metric in metrics:
metric(outputs, target, loss_outputs)
if batch_idx % log_interval == 0:
message = 'Train: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
batch_idx * len(data[0]), len(train_loader.dataset),
100. * batch_idx / len(train_loader), np.mean(losses))
for metric in metrics:
message += '\t{}: {}'.format(metric.name(), metric.value())
print(message)
losses = []
total_loss /= (batch_idx + 1)
return total_loss, metrics
def test_epoch(val_loader, model, loss_fn, cuda, metrics):
with torch.no_grad():
for metric in metrics:
metric.reset()
model.eval()
val_loss = 0
for batch_idx, (data, target) in enumerate(val_loader):
target = target if len(target) > 0 else None
if not type(data) in (tuple, list):
data = (data,)
if cuda:
data = tuple(d.cuda() for d in data)
if target is not None:
target = target.cuda()
outputs = model(*data)
if type(outputs) not in (tuple, list):
outputs = (outputs,)
loss_inputs = outputs
if target is not None:
target = (target,)
loss_inputs += target
loss_outputs = loss_fn(*loss_inputs)
loss = loss_outputs[0] if type(loss_outputs) in (tuple, list) else loss_outputs
val_loss += loss.item()
for metric in metrics:
metric(outputs, target, loss_outputs)
return val_loss, metrics
networks.py
import torch.nn as nn
import torch.nn.functional as F
class EmbeddingNet(nn.Module):
def __init__(self):
super(EmbeddingNet, self).__init__()
self.convnet = nn.Sequential(nn.Conv2d(1, 32, 5), nn.PReLU(),
nn.MaxPool2d(2, stride=2),
nn.Conv2d(32, 64, 5), nn.PReLU(),
nn.MaxPool2d(2, stride=2))
self.fc = nn.Sequential(nn.Linear(64 * 4 * 4, 256),
nn.PReLU(),
nn.Linear(256, 256),
nn.PReLU(),
nn.Linear(256, 2)
)
def forward(self, x):
output = self.convnet(x)
output = output.view(output.size()[0], -1)
output = self.fc(output)
return output
def get_embedding(self, x):
return self.forward(x)
class EmbeddingNetL2(EmbeddingNet):
def __init__(self):
super(EmbeddingNetL2, self).__init__()
def forward(self, x):
output = super(EmbeddingNetL2, self).forward(x)
output /= output.pow(2).sum(1, keepdim=True).sqrt()
return output
def get_embedding(self, x):
return self.forward(x)
class ClassificationNet(nn.Module):
def __init__(self, embedding_net, n_classes):
super(ClassificationNet, self).__init__()
self.embedding_net = embedding_net
self.n_classes = n_classes
self.nonlinear = nn.PReLU()
self.fc1 = nn.Linear(2, n_classes)
def forward(self, x):
output = self.embedding_net(x)
output = self.nonlinear(output)
scores = F.log_softmax(self.fc1(output), dim=-1)
return scores
def get_embedding(self, x):
return self.nonlinear(self.embedding_net(x))
class SiameseNet(nn.Module):
def __init__(self, embedding_net):
super(SiameseNet, self).__init__()
self.embedding_net = embedding_net
def forward(self, x1, x2):
output1 = self.embedding_net(x1)
output2 = self.embedding_net(x2)
return output1, output2
def get_embedding(self, x):
return self.embedding_net(x)
class TripletNet(nn.Module):
def __init__(self, embedding_net):
super(TripletNet, self).__init__()
self.embedding_net = embedding_net
def forward(self, x1, x2, x3):
output1 = self.embedding_net(x1)
output2 = self.embedding_net(x2)
output3 = self.embedding_net(x3)
return output1, output2, output3
def get_embedding(self, x):
return self.embedding_net(x)
四 问题思索