Variational Autoencoder(變分自編碼)

使用通用自編碼器的時候,首先將輸入encoder壓縮為一個小的 form,然後將其decoder轉換成輸出的一個估計。如果目標是簡單的重現輸入效果很好,但是若想生成新的物件就不太可行了,因為其實我們根本不知道這個網路所生成的編碼具體是什麼。雖然我們可以通過結果去對比不同的物件,但是要理解它內部的工作方式幾乎是不可能的,甚至有時候可能連輸入應該是什麼樣子的都不知道。

解決方法是用相反的方法使用變分自編碼器(Variational Autoencoder,VAE),即不去關注隱含向量所服從的分佈,只需要告訴網路我們想讓這個分佈轉換為什麼樣子就行了。VAE對隱層的輸出增加了長約束,而在對隱層的取樣過程也能起到和一般 dropout 效果類似的正則化作用。而至於它的名字變分

推理(Variational Inference,VI)的思想是最大化與資料點x相關聯的變分下界來訓練,即尋找一個容易處理的分佈 q(z),使得 q(z) 與目標分佈p(z|x) 儘量接近以便用q(z) 來代替 p(z|x),分佈之間的‘接近’度量採用 Kullback–Leibler divergence(KL 散度)。


import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('MNIST_data')#28*28的單色道影象資料


batch_size = 64

X_in = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28
], name='X') Y = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='Y') Y_flat = tf.reshape(Y, shape=[-1, 28 * 28])#用於計算損失函式 keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='keep_prob')#dropout比率 dec_in_channels = 1 n_latent = 8 reshaped_dim = [-1, 7, 7, dec_in_channels] inputs_decoder = 49 * dec_in_channels // 2 def lrelu(x, alpha=0.3):#自定義Leaky ReLU函式使效果更好 return tf.maximum(x, tf.multiply(x, alpha)) #編碼 def encoder(X_in, keep_prob): activation = lrelu with tf.variable_scope("encoder", reuse=None): X = tf.reshape(X_in, shape=[-1, 28, 28, 1]) x = tf.layers.conv2d(X, filters=64, kernel_size=4, strides=2, padding='same', activation=activation) x = tf.nn.dropout(x, keep_prob) x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=2, padding='same', activation=activation) x = tf.nn.dropout(x, keep_prob) x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation) x = tf.nn.dropout(x, keep_prob) x = tf.contrib.layers.flatten(x) mn = tf.layers.dense(x, units=n_latent)#means sd = 0.5 * tf.layers.dense(x, units=n_latent)#standard deviations epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_latent])) #從正態分佈中取樣 z = mn + tf.multiply(epsilon, tf.exp(sd)) return z, mn, sd #解碼 def decoder(sampled_z, keep_prob): with tf.variable_scope("decoder", reuse=None): x = tf.layers.dense(sampled_z, units=inputs_decoder, activation=lrelu) x = tf.layers.dense(x, units=inputs_decoder * 2 + 1, activation=lrelu) x = tf.reshape(x, reshaped_dim) x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=2, padding='same', activation=tf.nn.relu) x = tf.nn.dropout(x, keep_prob) x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu) x = tf.nn.dropout(x, keep_prob) x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu) x = tf.contrib.layers.flatten(x) x = tf.layers.dense(x, units=28*28, activation=tf.nn.sigmoid) img = tf.reshape(x, shape=[-1, 28, 28]) return img #結合 sampled, mn, sd = encoder(X_in, keep_prob) dec = decoder(sampled, keep_prob) #損失函式 unreshaped = tf.reshape(dec, [-1, 28*28]) img_loss = tf.reduce_sum(tf.squared_difference(unreshaped, Y_flat), 1) latent_loss = -0.5 * tf.reduce_sum(1.0 + 2.0 * sd - tf.square(mn) - tf.exp(2.0 * sd), 1) loss = tf.reduce_mean(img_loss + latent_loss) optimizer = tf.train.AdamOptimizer(0.0005).minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) for i in range(30000):#開始訓練 batch = [np.reshape(b, [28, 28]) for b in mnist.train.next_batch(batch_size=batch_size)[0]] sess.run(optimizer, feed_dict = {X_in: batch, Y: batch, keep_prob: 0.8}) if not i % 200: ls, d, i_ls, d_ls, mu, sigm = sess.run([loss, dec, img_loss, latent_loss, mn, sd], feed_dict = {X_in: batch, Y: batch, keep_prob: 1.0}) plt.imshow(np.reshape(batch[0], [28, 28]), cmap='gray') plt.show() plt.imshow(d[0], cmap='gray') plt.show() print(i, ls, np.mean(i_ls), np.mean(d_ls)) #生成新的字元 randoms = [np.random.normal(0, 1, n_latent) for _ in range(10)] imgs = sess.run(dec, feed_dict = {sampled: randoms, keep_prob: 1.0}) imgs = [np.reshape(imgs[i], [28, 28]) for i in range(len(imgs))] for img in imgs: plt.figure(figsize=(1,1)) plt.axis('off') plt.imshow(img, cmap='gray')




