1. 对抗网络GAN的网络结构
对抗网络包含了2个子网络:生成网络(Generator,G)和判别网络(Discriminator,D),其中生成网络负责学习样本的真实分布,判别网络负责将生成网络采样的样本与真实样本区分开来。
2. 对抗网络GAN的训练方法
对于判别网络 D,它的目标是能够很好地分辨出真样本𝒙𝑟与假样本𝒙𝑓。以图片生成为例,它的目标是最小化图片的预测值和真实值之间的交叉熵损失函数:𝑚𝑖𝑛 𝜃ℒ = 𝐶𝑟𝑜𝑠𝑠𝑒𝑛𝑡𝑟𝑜𝑝𝑦(𝐷𝜃(𝒙𝑟),𝑦𝑟,𝐷𝜃(𝒙𝑓),𝑦𝑓)
对于生成网络G(𝒛),我们希望𝒙𝑓 = 𝐺(𝒛)能够很好地骗过判别网络 D,假样本𝒙𝑓在判别 网络的输出越接近真实的标签越好。也就是说,在训练生成网络时,希望判别网络的输出 𝐷(𝐺(𝒛))越逼近 1 越好,此时的交叉熵损失函数为:
𝑚𝑖𝑛 𝜙ℒ = 𝐶𝑟𝑜𝑠𝑠𝑒𝑛𝑡𝑟𝑜𝑝𝑦(𝐷(𝐺𝜙(𝒛)),1) = −𝑙𝑜𝑔𝐷(𝐺𝜙(𝒛))
3. 常用的GAN衍生方法
(1)DCGAN
GAN网络:基于全连接层实现生成器和判别器,由于图片维度较高,网络参数巨大,训练效果不好
DCGAN网络:采用转置卷积层实现生成器,普通卷积层实现判别器,大大降低了网络参数量,同时图片的生成效果大幅提升。
(2)InfoGAN
使用无监督的方式学习输入x的可解释隐向量z表示方法,即希望隐向量z能够对应到数据的语义特征。
(3)CycleGAN
CycleGAN是无监督方式进行图片相互转换的算法。
(4)WGAN/WGAN-GP
从理论层面分析了原始的GAN使用JS散度存在的缺陷,并提出用Wasserstein距离来解决这个问题。
(5)Self-Attention GAN
基于自我注意力机制的GAN。
(6)BigGAN
利用正则化等技巧保证训练大数据过程的稳定性。
4. 对抗网络DCGAN样例
# 整合数据集
import tensorflow as tf
import glob
import os
import numpy as np
from PIL import Image
resize = 64
batch_size = 64
def preprocess(path):
img = tf.io.read_file(path)
img = tf.image.decode_jpeg(img, channels=3) # fix channels to 3
img = tf.image.resize(img, [resize, resize])
img = tf.clip_by_value(img, 0, 255)
img = img / 127.5 - 1
return img
# 获取数据集
img_paths = glob.glob('F:/tensorflow/TESTCode/faces/*.jpg')
# print(img_paths)
dataset = tf.data.Dataset.from_tensor_slices(img_paths)
dataset = dataset.map(preprocess)
dataset = dataset.batch(batch_size)
print(dataset)
# 构建生成器网络和对抗器网络
class Generator(tf.keras.Model):
# 生成器网络
def __init__(self):
super(Generator, self).__init__()
filter = 64
#转置卷积层1
self.conv1 = tf.keras.layers.Conv2DTranspose(filter*8, 4, 1, padding='valid', use_bias=False)
self.bn1 = tf.keras.layers.BatchNormalization()
# 转置卷积层2
self.conv2 = tf.keras.layers.Conv2DTranspose(filter * 4, 4, 2, padding='same', use_bias=False)
self.bn2 = tf.keras.layers.BatchNormalization()
# 转置卷积层3
self.conv3 = tf.keras.layers.Conv2DTranspose(filter * 2, 4, 2, padding='same', use_bias=False)
self.bn3 = tf.keras.layers.BatchNormalization()
# 转置卷积层4
self.conv4 = tf.keras.layers.Conv2DTranspose(filter * 1, 4, 2, padding='same', use_bias=False)
self.bn4 = tf.keras.layers.BatchNormalization()
# 转置卷积层5
self.conv5 = tf.keras.layers.Conv2DTranspose(3, 4, 2, padding='same', use_bias=False)
def call(self, inputs, training=None):
x = inputs
x = tf.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
x = tf.nn.relu(x)
x = tf.nn.relu(self.bn1(self.conv1(x), training=training))
x = tf.nn.relu(self.bn2(self.conv2(x), training=training))
x = tf.nn.relu(self.bn3(self.conv3(x), training=training))
x = tf.nn.relu(self.bn4(self.conv4(x), training=training))
x = self.conv5(x)
x = tf.tanh(x)
return x
class Discriminator( tf.keras.Model ):
def __init__(self):
super(Discriminator, self).__init__()
filter = 64
#卷积层1
self.conv1 = tf.keras.layers.Conv2D(filter, 4, 2, 'valid', use_bias=False)
self.bn1 = tf.keras.layers.BatchNormalization()
# 卷积层2
self.conv2 = tf.keras.layers.Conv2D(filter*2, 4, 2, 'valid', use_bias=False)
self.bn2 = tf.keras.layers.BatchNormalization()
# 卷积层3
self.conv3 = tf.keras.layers.Conv2D(filter*4, 4, 2, 'valid', use_bias=False)
self.bn3 = tf.keras.layers.BatchNormalization()
# 卷积层4
self.conv4 = tf.keras.layers.Conv2D(filter*8, 3, 1, 'valid', use_bias=False)
self.bn4 = tf.keras.layers.BatchNormalization()
# 卷积层5
self.conv5 = tf.keras.layers.Conv2D(filter*16, 3, 1, 'valid', use_bias=False)
self.bn5 = tf.keras.layers.BatchNormalization()
# 全局池化层
self.pool = tf.keras.layers.GlobalAveragePooling2D()
# 特性打平层
self.flatten = tf.keras.layers.Flatten()
# 2分类全连接层
self.fc = tf.keras.layers.Dense(1)
def call(self, inputs, training=None):
x = tf.nn.leaky_relu(self.bn1(self.conv1(inputs), training=training))
x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training))
x = tf.nn.leaky_relu(self.bn3(self.conv3(x), training=training))
x = tf.nn.leaky_relu(self.bn4(self.conv4(x), training=training))
x = tf.nn.leaky_relu(self.bn5(self.conv5(x), training=training))
x = self.pool(x)
x = self.flatten(x)
logits = self.fc(x)
return logits
# 构建误差函数
def celoss_ones(logits):
y = tf.ones_like(logits)
loss = tf.keras.losses.binary_crossentropy(y, logits, from_logits=True)
return tf.reduce_mean(loss)
def celoss_zeros(logits):
y = tf.zeros_like(logits)
loss = tf.keras.losses.binary_crossentropy(y, logits, from_logits=True)
return tf.reduce_mean(loss)
def d_loss_fn(generator, discriminator, batch_z, batch_x, is_training):
# 计算判别器的误差函数
# 采样生成图片
fake_image = generator(batch_z, is_training)
# 判定生成图片
d_fake_logits = discriminator(fake_image, is_training)
# 判定真实图片
d_real_logits = discriminator(batch_x, is_training)
# 真实图片与1之间的误差
d_loss_real = celoss_ones(d_real_logits)
# 生成图片与0之间的误差
d_loss_fake = celoss_zeros(d_fake_logits)
# 合并误差
loss = d_loss_fake + d_loss_real
return loss
def g_loss_fn(generator, discriminator, batch_z, is_training):
#采样生成图片
fake_image = generator(batch_z, is_training)
# 在训练生成网络时,需要迫使生成图片判定为真
d_fake_logits = discriminator(fake_image, is_training)
# 计算生成图片与1之间的误差
loss = celoss_ones(d_fake_logits)
return loss
# 保存图片
def save_images(valout, name):
def preprocess(img):
img = ((img + 1.0) * 127.5).astype(np.uint8)
return img
# imgs数据格式[100,64,64,3]
imgs = preprocess(valout)
# 新建一张RGB的Image,接收三通道数据
new_im = Image.new('RGB', (640, 640))
index = 0
for i in range(0, 640, 64): # 10 行图片阵列,i从0开始到640,间隔为64
for j in range(0, 640, 64): # 10 列图片阵列,i从0开始到640,间隔为64
im = imgs[index]
im = Image.fromarray(im, mode='RGB')
new_im.paste(im, (i, j)) # 写入对应位置
index += 1 # 保存图片阵列
new_im.save(name)
# 训练网络
z_dim = 100
learning_rate = 0.0002
db_iter = iter(dataset)
epochs = 300
is_training = True
# 创建生成器
generator = Generator()
generator.build(input_shape=(4, z_dim))
# 创建判别器
discriminator = Discriminator()
discriminator.build(input_shape=(4, 64, 64, 3))
# 分别为生成器和判别器创建优化器
g_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
d_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
# generator.load_weights('generator.ckpt')
# discriminator.load_weights('discriminator.ckpt')
# print('Loaded chpt!!')
d_losses, g_losses = [], []
for epoch in range(epochs):
# 1. 训练判别器
for _ in range(1):
# 采样隐藏向量
batch_z = tf.random.normal([batch_size, z_dim])
batch_x = next(db_iter) #采样真实图片
if epoch %100 ==0:
print(batch_x.shape)
# 判别器前向计算
with tf.GradientTape() as tape:
d_loss = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training)
if epoch %100 ==0:
print('d_loss: ', d_loss)
grads = tape.gradient(d_loss, discriminator.trainable_variables)
g_optimizer.apply_gradients(grads_and_vars=zip(grads, discriminator.trainable_variables))
#2.训练生成器
#采样隐藏向量
batch_z = tf.random.normal([batch_size, z_dim])
batch_x = next(db_iter)
#生成器前向计算
with tf.GradientTape() as tape:
g_loss = g_loss_fn(generator, discriminator, batch_z, is_training)
grads = tape.gradient(g_loss, generator.trainable_variables)
g_optimizer.apply_gradients(zip(grads, generator.trainable_variables))
if epoch %100 ==0:
print(epoch, 'd_loss:', float(d_loss), 'g_loss:', float(g_loss))
#可视化
z = tf.random.normal([100, z_dim])
fake_image = generator(z, training=False)
# img_path = os.path.join('gan_images','gan-d%.png'%epoch)
save_images(fake_image.numpy(), 'gan-%d.png'%epoch)
d_losses.append(float(d_loss))
g_losses.append(float(g_loss))
if epoch % 100 == 1:
# print(d_losses)
# print(g_losses)
generator.save_weights('generator.ckpt')
discriminator.save_weights('discriminator.ckpt')