深度有趣 | 07 生成式對抗網路
除VAE之外,生成式對抗網路(Generative Adversarial Nets,GAN)也是一種非常流行的無監督生成式模型
GAN中主要包括兩個核心網路
- 生成器(Generator):記作G,通過對大量樣本的學習,能夠生成一些以假亂真的樣本,和VAE類似
- 判別器(Discriminator):記作D,接受真實樣本和G生成的樣本,並進行判別和區分
- G和D相互博弈,通過學習,G的生成能力和D的判別能力都逐漸增強並收斂
GAN的訓練非常困難,有很多細節需要注意,才能生成質量較高的圖片
strides
這裡我們以 MNIST
為例,通過 TensorFlow
實現GAN,由於用到深度卷積神經網路,所以也稱作DCGAN(Deep Convolutional GAN)

原理
對於一個服從隨機分佈的噪音z,生成器通過一個複雜的對映函式生成假的樣本
\hat{x}=G(z;\theta_g) 複製程式碼
判別器則使用另一個複雜的對映函式,對於真實樣本或假的樣本,輸出一個0至1之間的值,越大表示越有可能是真實的樣本
s=D(x;\theta_d) 複製程式碼
總的目標函式如下
\min_{G}\max_{D} V(D,G)=\mathbb{E}_{x\sim p_{data}}[\log D(x)] + \mathbb{E}_{z\sim p_z}[\log(1-D(G(z)))] 複製程式碼
實現
載入庫
# -*- coding: utf-8 -*- import tensorflow as tf import numpy as np import matplotlib.pyplot as plt %matplotlib inline import os, imageio 複製程式碼
載入資料
from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data') 複製程式碼
定義一些常量、網路輸入、輔助函式
batch_size = 100 z_dim = 100 OUTPUT_DIR = 'samples' if not os.path.exists(OUTPUT_DIR): os.mkdir(OUTPUT_DIR) X = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28, 1], name='X') noise = tf.placeholder(dtype=tf.float32, shape=[None, z_dim], name='noise') is_training = tf.placeholder(dtype=tf.bool, name='is_training') def lrelu(x, leak=0.2): return tf.maximum(x, leak * x) def sigmoid_cross_entropy_with_logits(x, y): return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=y) 複製程式碼
判別器部分
def discriminator(image, reuse=None, is_training=is_training): momentum = 0.9 with tf.variable_scope('discriminator', reuse=reuse): h0 = lrelu(tf.layers.conv2d(image, kernel_size=5, filters=64, strides=2, padding='same')) h1 = tf.layers.conv2d(h0, kernel_size=5, filters=128, strides=2, padding='same') h1 = lrelu(tf.contrib.layers.batch_norm(h1, is_training=is_training, decay=momentum)) h2 = tf.layers.conv2d(h1, kernel_size=5, filters=256, strides=2, padding='same') h2 = lrelu(tf.contrib.layers.batch_norm(h2, is_training=is_training, decay=momentum)) h3 = tf.layers.conv2d(h2, kernel_size=5, filters=512, strides=2, padding='same') h3 = lrelu(tf.contrib.layers.batch_norm(h3, is_training=is_training, decay=momentum)) h4 = tf.contrib.layers.flatten(h3) h4 = tf.layers.dense(h4, units=1) return tf.nn.sigmoid(h4), h4 複製程式碼
生成器部分
def generator(z, is_training=is_training): momentum = 0.9 with tf.variable_scope('generator', reuse=None): d = 3 h0 = tf.layers.dense(z, units=d * d * 512) h0 = tf.reshape(h0, shape=[-1, d, d, 512]) h0 = tf.nn.relu(tf.contrib.layers.batch_norm(h0, is_training=is_training, decay=momentum)) h1 = tf.layers.conv2d_transpose(h0, kernel_size=5, filters=256, strides=2, padding='same') h1 = tf.nn.relu(tf.contrib.layers.batch_norm(h1, is_training=is_training, decay=momentum)) h2 = tf.layers.conv2d_transpose(h1, kernel_size=5, filters=128, strides=2, padding='same') h2 = tf.nn.relu(tf.contrib.layers.batch_norm(h2, is_training=is_training, decay=momentum)) h3 = tf.layers.conv2d_transpose(h2, kernel_size=5, filters=64, strides=2, padding='same') h3 = tf.nn.relu(tf.contrib.layers.batch_norm(h3, is_training=is_training, decay=momentum)) h4 = tf.layers.conv2d_transpose(h3, kernel_size=5, filters=1, strides=1, padding='valid', activation=tf.nn.tanh, name='g') return h4 複製程式碼
定義損失函式,注意這裡實現了兩個判別器,但引數是共享的
g = generator(noise) d_real, d_real_logits = discriminator(X) d_fake, d_fake_logits = discriminator(g, reuse=True) vars_g = [var for var in tf.trainable_variables() if var.name.startswith('generator')] vars_d = [var for var in tf.trainable_variables() if var.name.startswith('discriminator')] loss_d_real = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_real_logits, tf.ones_like(d_real))) loss_d_fake = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake_logits, tf.zeros_like(d_fake))) loss_g = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake_logits, tf.ones_like(d_fake))) loss_d = loss_d_real + loss_d_fake 複製程式碼
定義優化函式,注意損失函式需要和可調引數對應上
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer_d = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_d, var_list=vars_d) optimizer_g = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_g, var_list=vars_g) 複製程式碼
定義一個輔助函式,用於將多張圖片以網格狀拼在一起顯示
def montage(images): if isinstance(images, list): images = np.array(images) img_h = images.shape[1] img_w = images.shape[2] n_plots = int(np.ceil(np.sqrt(images.shape[0]))) m = np.ones((images.shape[1] * n_plots + n_plots + 1, images.shape[2] * n_plots + n_plots + 1)) * 0.5 for i in range(n_plots): for j in range(n_plots): this_filter = i * n_plots + j if this_filter < images.shape[0]: this_img = images[this_filter] m[1 + i + i * img_h:1 + i + (i + 1) * img_h, 1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img return m 複製程式碼
開始訓練,每次迭代訓練G兩次
sess = tf.Session() sess.run(tf.global_variables_initializer()) z_samples = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32) samples = [] loss = {'d': [], 'g': []} for i in range(60000): n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32) batch = mnist.train.next_batch(batch_size=batch_size)[0] batch = np.reshape(batch, [-1, 28, 28, 1]) batch = (batch - 0.5) * 2 d_ls, g_ls = sess.run([loss_d, loss_g], feed_dict={X: batch, noise: n, is_training: True}) loss['d'].append(d_ls) loss['g'].append(g_ls) sess.run(optimizer_d, feed_dict={X: batch, noise: n, is_training: True}) sess.run(optimizer_g, feed_dict={X: batch, noise: n, is_training: True}) sess.run(optimizer_g, feed_dict={X: batch, noise: n, is_training: True}) if i % 1000 == 0: print(i, d_ls, g_ls) gen_imgs = sess.run(g, feed_dict={noise: z_samples, is_training: False}) gen_imgs = (gen_imgs + 1) / 2 imgs = [img[:, :, 0] for img in gen_imgs] gen_imgs = montage(imgs) plt.axis('off') plt.imshow(gen_imgs, cmap='gray') plt.savefig(os.path.join(OUTPUT_DIR, 'sample_%d.jpg' % i)) plt.show() samples.append(gen_imgs) plt.plot(loss['d'], label='Discriminator') plt.plot(loss['g'], label='Generator') plt.legend(loc='upper right') plt.savefig('Loss.png') plt.show() imageio.mimsave(os.path.join(OUTPUT_DIR, 'samples.gif'), samples, fps=5) 複製程式碼
生成的圖片如下,由於損失函式中並未使用到逐畫素比較,因此圖形邊緣不會出現模糊

儲存模型,便於後續使用
saver = tf.train.Saver() saver.save(sess, './mnist_dcgan', global_step=60000) 複製程式碼
載入模型,如果需要的話,例如在單機上使用
# -*- coding: utf-8 -*- import tensorflow as tf import numpy as np import matplotlib.pyplot as plt batch_size = 100 z_dim = 100 def montage(images): if isinstance(images, list): images = np.array(images) img_h = images.shape[1] img_w = images.shape[2] n_plots = int(np.ceil(np.sqrt(images.shape[0]))) m = np.ones((images.shape[1] * n_plots + n_plots + 1, images.shape[2] * n_plots + n_plots + 1)) * 0.5 for i in range(n_plots): for j in range(n_plots): this_filter = i * n_plots + j if this_filter < images.shape[0]: this_img = images[this_filter] m[1 + i + i * img_h:1 + i + (i + 1) * img_h, 1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img return m sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.import_meta_graph('./mnist_dcgan-60000.meta') saver.restore(sess, tf.train.latest_checkpoint('./')) graph = tf.get_default_graph() g = graph.get_tensor_by_name('generator/g/Tanh:0') noise = graph.get_tensor_by_name('noise:0') is_training = graph.get_tensor_by_name('is_training:0') n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32) gen_imgs = sess.run(g, feed_dict={noise: n, is_training: False}) gen_imgs = (gen_imgs + 1) / 2 imgs = [img[:, :, 0] for img in gen_imgs] gen_imgs = montage(imgs) plt.axis('off') plt.imshow(gen_imgs, cmap='gray') plt.show() 複製程式碼