tensorflow 自編碼器
阿新 • • 發佈:2019-01-15
Autoencoders
- Autoencoder可以對輸入資料進行無監督的學習,coding得到的結果的維度往往很低,因此自編碼器可以用於資料的降維;自編碼器也可以用於特徵檢測;同時也可以生成許多與訓練資料相似的新資料,這可以被稱為生成模型(generative model)。
setup code
# 不顯示python使用過程中的警告
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
def reset_graph(seed=42):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
return
# with tf.Session( config=tf.ConfigProto(gpu_options=gpu_options) ) as sess:
with tf.Session( ) as sess:
print( sess.run( tf.constant(1) ) )
1
資料表示形式
- 實際生活中,許多資料都是有很強的相關性,我們可以對其處理、降維等
- 自編碼器與多層感知器十分類似,唯一的區別就是:自編碼器的輸出與輸入的神經元節點個數是相同的;encoder-decoder串聯的話其實就相當於一個重建的過程,首先對輸入進行處理,然後對其進行復原。
- 自編碼器是對資料進行降維了,因此它是不完全的,在對資料進行表徵時,它選擇最重要的特徵並記憶,丟棄一些不重要的特徵
使用PCA對資料進行不完全的表示
- 如果對於autoencoder來說,啟用函式是線性的,同時loss是MSE,則autoencoder中的encoder等效於PCA。
import numpy.random as rnd
from sklearn.preprocessing import StandardScaler
rnd.seed(4)
m = 200
w1, w2 = 0.1, 0.3
noise = 0.1
angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5
data = np.empty((m, 3))
data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2
data[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2
data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * rnd.randn(m)
scaler = StandardScaler()
X_train = scaler.fit_transform(data[:100])
X_test = scaler.transform(data[100:])
n_inputs = 3
n_hidden = 2
n_outputs = n_inputs # 輸出節點與輸入節點個數相同,這也是autoencoder的特點
learning_rate = 0.01
X = tf.placeholder( tf.float32, shape=[None, n_inputs] )
hidden = tf.layers.dense( X, n_hidden, activation=None )
outputs = tf.layers.dense( hidden, n_outputs, activation=None )
reconstruction_loss = tf.reduce_mean( tf.square( outputs - X ) )
optimizer = tf.train.AdamOptimizer( learning_rate )
training_op = optimizer.minimize( reconstruction_loss )
init = tf.global_variables_initializer()
n_iterations = 1000
codings = hidden
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
training_op.run(feed_dict={X: X_train})
codings_val = codings.eval(feed_dict={X: X_test}) # autoencoder的輸出,之前的輸出是為了計算loss
plt.figure(figsize=(4,3))
plt.plot(codings_val[:,0], codings_val[:, 1], "b.")
plt.xlabel("$z_1$", fontsize=18)
plt.ylabel("$z_2$", fontsize=18, rotation=0)
plt.show()
# 畫圖
def plot_image(image, shape=[28, 28]):
plt.imshow(image.reshape(shape), cmap="Greys", interpolation="nearest")
plt.axis("off")
return
def plot_multiple_images(images, n_rows, n_cols, pad=2):
images = images - images.min() # make the minimum == 0, so the padding looks white
w,h = images.shape[1:]
image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))
for y in range(n_rows):
for x in range(n_cols):
image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]
plt.imshow(image, cmap="Greys", interpolation="nearest")
plt.axis("off")
return
棧式自編碼器(Stacked Autoencoders)
- 如果autoencoder含有很多層隱含層,我可以稱為棧式自編碼器
- 棧式自編碼器一般都是有對稱的結構(相對於自編碼器的coding結果對稱)
from functools import partial
import sys
from tensorflow.examples.tutorials.mnist import input_data
reset_graph()
mnist = input_data.read_data_sets("./dataset/mnist/")
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 150
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0001
X = tf.placeholder( tf.float32, shape=[None, n_inputs])
he_init = tf.contrib.layers.variance_scaling_initializer()
l2_regularizer = tf.contrib.layers.l2_regularizer( l2_reg )
# 將引數預統一編寫,因為所有FC層都是一樣的
my_dense_layer = partial( tf.layers.dense, activation=tf.nn.relu, kernel_initializer=he_init, kernel_regularizer=l2_regularizer )
hidden1 = my_dense_layer( X, n_hidden1 )
hidden2 = my_dense_layer( hidden1, n_hidden2 )
hidden3 = my_dense_layer( hidden2, n_hidden3 )
outputs = my_dense_layer( hidden3, n_outputs )
reconstruction_loss = tf.reduce_mean( tf.square( outputs - X ) )
# 得到所有FC層的權重資訊,返回的是一個列表
reg_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES )
# 將所有loss合併為一個列表,然後求和,即最終的優化目標
loss = tf.add_n([reconstruction_loss] + reg_losses)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 500
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = mnist.train.num_examples // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
saver.save(sess, "./models/ae/stack_ae.ckpt")
Extracting ./dataset/mnist/train-images-idx3-ubyte.gz
Extracting ./dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ./dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ./dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train MSE: 0.043224763
1 Train MSE: 0.042281948
2 Train MSE: 0.041092556
3 Train MSE: 0.039506104
4 Train MSE: 0.04099624
5 Train MSE: 0.04035872
6 Train MSE: 0.039791297
7 Train MSE: 0.040091157
8 Train MSE: 0.042521376
9 Train MSE: 0.0400601
- 注:在使用tf.add_n()時,一直遇到:
TypeError: 'list' object is not callable
的問題,估計是一開始寫程式碼的時候寫了一個add_n變數,將add_n變數覆蓋了,重啟了一下救可以了
# add_n測試
reset_graph()
a = tf.constant(1)
b = tf.constant(2)
c = tf.add_n( [a,b] )
with tf.Session() as sess:
print( sess.run( c ) )
3
繫結權重
- 如果自編碼器被設計為對稱的結構,假設總共有層(不包括輸入層),第層的權重是,第1層為隱含層,第為輸出層,第層是coding層,可以設計為的權重方式
def show_reconstructed_digits(X, outputs, model_path = None, n_test_digits = 2):
with tf.Session() as sess:
if model_path:
saver.restore(sess, model_path)
X_test = mnist.test.images[:n_test_digits]
outputs_val = outputs.eval(feed_dict={X: X_test})
fig = plt.figure(figsize=(8, 3 * n_test_digits))
for digit_index in range(n_test_digits):
plt.subplot(n_test_digits, 2, digit_index * 2 + 1)
plot_image(X_test[digit_index])
plt.subplot(n_test_digits, 2, digit_index * 2 + 2)
plot_image(outputs_val[digit_index])
show_reconstructed_digits(X, outputs, "./models/ae/stack_ae.ckpt")
plt.show()
INFO:tensorflow:Restoring parameters from ./models/ae/stack_ae.ckpt
- 因為我們使用轉置的方法獲取對稱的隱含層的權重,因此無法使用
tf.layers.dense
,需要自己實現網路層的計算方式(線性計算+啟用函式)
reset_graph()
mnist = input_data.read_data_sets("./dataset/mnist/")
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 150
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0005
activation = tf.nn.relu
regularizer = tf.contrib.layers.l2_regularizer( l2_reg )
initializer = tf.contrib.layers.variance_scaling_initializer()
X = tf.placeholder( tf.float32, shape=[None, n_inputs] )
weights1_init = initializer( [n_inputs, n_hidden1] )
weights2_init = initializer( [n_hidden1, n_hidden2] )
weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.transpose(weights2, name="weights3") # tied weights
weights4 = tf.transpose(weights1, name="weights4") # tied weights
biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_hidden3), name="biases3")
biases4 = tf.Variable(tf.zeros(n_outputs), name="biases4")
hidden1 = activation(tf.matmul(X, weights1) + biases1)
hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)
outputs = tf.matmul(hidden3, weights4) + biases4
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
reg_loss = regularizer(weights1) + regularizer(weights2)
loss = reconstruction_loss + reg_loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = mnist.train.num_examples // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
saver.save(sess, "./models/ae/stack_ae_tying_weights.ckpt")
Extracting ./dataset/mnist/train-images-idx3-ubyte.gz
Extracting ./dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ./dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ./dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train MSE: 0.028156107
1 Train MSE: 0.0266964
2 Train MSE: 0.027346307
3 Train MSE: 0.026245965
4 Train MSE: 0.025696924
5 Train MSE: 0.027034879
6 Train MSE: 0.026534757
7 Train MSE: 0.02712253
8 Train MSE: 0.031350538
9 Train MSE: 0.030764775
show_reconstructed_digits(X, outputs, "./models/ae/stack_ae_tying_weights.ckpt")
plt.show()
INFO:tensorflow:Restoring parameters from ./models/ae/stack_ae_tying_weights.ckpt
- 之前的一次訓練一個層數很多的autoencoder的速度十分慢,一次只訓練一個淺層的autoencoder會快很多,然後將這些訓練好的autoencoder整合為一個autoencoder,相當於一層一層構建autoencoder,首先利用autoencoder重構得到第一個隱含層的輸出,然後將這個輸出作為第二個autoencoder的輸入,繼續訓練,以此類推。
- 最簡單的封裝這種多相訓練演算法的方法就是在不同的graph中訓練不同的autoencoder,之後得到所有的weights與bias,利用這些變數就可以構建得到最終的autoencoder
reset_graph()
def train_autoencoder(X_train, n_neurons, n_epochs, batch_size,
learning_rate = 0.01, l2_reg = 0.0005,
activation=tf.nn.elu, seed=42):
graph = tf.Graph()
with graph.as_default():
tf.set_random_seed(seed)
n_inputs = X_train.shape[1]
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
my_dense_layer = partial(
tf.layers.dense,
activation=activation,
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),
kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))
hidden = my_dense_layer(X, n_neurons, name="hidden")
outputs = my_dense_layer(hidden, n_inputs, activation=None, name="outputs")
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([reconstruction_loss] + reg_losses)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
indices = np.random.permutation(len(X_train))[:batch_size]
X_batch = X_train[indices]
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])
hidden_val = hidden.eval(feed_dict={X: X_train})
return hidden_val, params["hidden/kernel:0"], params["hidden/bias:0"], params["outputs/kernel:0"], params["outputs/bias:0"]
hidden_output, W1, b1, W4, b4 = train_autoencoder(mnist.train.images, n_neurons=300, n_epochs=4, batch_size=150)
_, W2, b2, W3, b3 = train_autoencoder(hidden_output, n_neurons=150, n_epochs=4, batch_size=150)
0 Train MSE: 0.018122246
1 Train MSE: 0.018951437
2 Train MSE: 0.019684358
3 Train MSE: 0.019918667
0 Train MSE: 0.004202352
1 Train MSE: 0.0042918506
2 Train MSE: 0.0044221305
3 Train MSE: 0.004516779
# 利用上面訓練得到的引數構建最終的自編碼器
reset_graph()
n_inputs = 28*28
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
hidden1 = tf.nn.elu(tf.matmul(X, W1) + b1)
hidden2 = tf.nn.elu(tf.matmul(hidden1, W2) + b2)
hidden3 = tf.nn.elu(tf.matmul(hidden2, W3) + b3)
outputs = tf.matmul(hidden3, W4) + b4
# 不從檔案中載入網路資料
show_reconstructed_digits(X, outputs)
plt.show()
- 上面是在不同的graph中構建了不同的autoencdoer,也可以在同一個graph中構建,即構建2個loss,首先訓練第一個隱含層的引數,然後再訓練第二個隱含層的引數
- 在訓練不同的autoencoder時,一個能加快訓練速度的方法是:固定一開始得到的所有的訓練引數,計算出經過這一層的輸出,然後將其作為輸入,用於訓練下一個autoencoder,相當於修改feed_dict
對特徵進行視覺化
- 因為自編碼器可以學習特徵,因此我們可以對特徵進行視覺化
# 在一個graph中訓練多個autoencoder(按順序訓練)
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0001
activation = tf.nn.elu
regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
initializer = tf.contrib.layers.variance_scaling_initializer()
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
weights1_init = initializer([n_inputs, n_hidden1])
weights2_init = initializer([n_hidden1, n_hidden2])
weights3_init = initializer([n_hidden2, n_hidden3])
weights4_init = initializer([n_hidden3, n_outputs])
weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.Variable(weights3_init, dtype=tf.float32, name="weights3")
weights4 = tf.Variable(weights4_init, dtype=tf.float32, name="weights4")
biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_hidden3), name="biases3")
biases4 = tf.Variable(tf.zeros(n_outputs), name="biases4")
hidden1 = activation(tf.matmul(X, weights1) + biases1)
hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)
outputs = tf.matmul(hidden3, weights4) + biases4
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
optimizer = tf.train.AdamOptimizer(learning_rate)
with tf.name_scope("phase1"):
phase1_outputs = tf.matmul(hidden1, weights4) + biases4 # bypass hidden2 and hidden3
phase1_reconstruction_loss = tf.reduce_mean(tf.square(phase1_outputs - X))
phase1_reg_loss = regularizer(weights1) + regularizer(weights4)
phase1_loss = phase1_reconstruction_loss + phase1_reg_loss
phase1_training_op = optimizer.minimize(phase1_loss)
with tf.name_scope("phase2"):
phase2_reconstruction_loss = tf.reduce_mean(tf.square(hidden3 - hidden1))
phase2_reg_loss = regularizer(weights2) + regularizer(weights3)
phase2_loss = phase2_reconstruction_loss + phase2_reg_loss
train_vars = [weights2, biases2, weights3, biases3]
phase2_training_op = optimizer.minimize(phase2_loss, var_list=train_vars) # freeze hidden1
init = tf.global_variables_initializer()
saver = tf.train.Saver()
training_ops = [phase1_training_op, phase2_training_op]
reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]
n_epochs = [4, 4]
batch_sizes = [150, 150]
with tf.Session() as sess:
init.run()
for phase in range(2):
print("Training phase #{}".format(phase + 1))
for epoch in range(n_epochs[phase]):
n_batches = mnist.train.num_examples // batch_sizes[phase]
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])
sess.run(training_ops[phase], feed_dict={X: X_batch})
loss_train = reconstruction_losses[phase].eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
saver.save(sess, "./models/ae/stack_ae_one_at_a_time.ckpt")
loss_test = reconstruction_loss.eval(feed_dict={X: mnist.test.images})
print("Test MSE:", loss_test)
Training phase #1
0 Train MSE: 0.0075954874
1 Train MSE: 0.0076178126
2 Train MSE: 0.0075386846
3 Train MSE: 0.007713743
Training phase #2
0 Train MSE: 0.32253775
1 Train MSE: 0.008439677
2 Train MSE: 0.0027948823
3 Train MSE: 0.0022397852
Test MSE: 0.009754321
with tf.Session() as sess:
saver.restore(sess, "./models/ae/stack_ae_one_at_a_time.ckpt") # not shown in the book
weights1_val = weights1.eval()
for i in range(5):
plt.subplot(1, 5, i + 1)
plot_image(weights1_val.T[i])
plt.show() # not shown
INFO:tensorflow:Restoring parameters from ./models/ae/stack_ae_one_at_a_time.ckpt
使用自編碼器進行非監督的預訓練
- 如果labelded資料很少,那麼訓練的過程中,模型比較容易過擬合(在引數較多的情況下),我們可以找到一個類似的任務,然後拷貝他的低層的訓練引數到該任務中
- 此外,我們也可以用自編碼器對ublabeled資料進行訓練,然後將淺層的隱含層引數拷貝到labeled資料中,相當於預處理,這也可以減少訓練的引數量
- 在之前的自編碼器中,隱含層節點的個數小於輸入層節點的個數,這會使得資料的資訊由部分缺失,如果隱含層節點個數大於輸入層節點個數,那學習到的自編碼器是過完全的自編碼器(Denoising Autoencoders)
降噪自編碼器(Denoising Autoencoders)
- 可以使用自編碼器進行降噪,可以通過對輸入加入高斯噪聲或者對輸入使用dropout來引入噪聲
# 新增高斯噪聲
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
noise_level = 1.0
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_noisy = X + noise_level * tf.random_normal(tf.shape(X))
hidden1 = tf.layers.dense(X_noisy, n_hidden1, activation=tf.nn.relu,
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book
name="hidden2") # not shown
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown
name="hidden3") # not shown
outputs = tf.layers.dense(hidden3, n_outputs, name="outputs") # not shown
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = mnist.train.num_examples // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
saver.save(sess, "./models/ae/stacked_denoising_gaussian.ckpt")
0 Train MSE: 0.04392284
1 Train MSE: 0.04212565
2 Train MSE: 0.04013202
3 Train MSE: 0.042316306
4 Train MSE: 0.04260728
5 Train MSE: 0.039247368
6 Train MSE: 0.040977154
7 Train MSE: 0.040510636
8 Train MSE: 0.039376777
9 Train MSE: 0.03940287
# 新增高斯噪聲
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
dropout_rate = 0.3
training = tf.placeholder_with_default( False, shape=(), name="training" )
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_drop = tf.layers.dropout( X, dropout_rate, training=training )
hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu,
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book
name="hidden2") # not shown
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown
name="hidden3") # not shown
outputs = tf.layers.dense(hidden3, n_outputs, name="outputs") # not shown
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = mnist.train.num_examples // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "Train MSE:", loss_train)
saver.save(sess, "./models/ae/stacked_denoising_dropout.ckpt")
0 Train MSE: 0.031744514
1 Train MSE: 0.02759777
2 Train MSE: 0.027245231
3 Train MSE: 0.026180187
4 Train MSE: 0.027719578
5 Train MSE: 0.027861215
6 Train MSE: 0.024314487
7 Train MSE: 0.026936982
8 Train MSE: 0.024768423
9 Train MSE: 0.027037865
show_reconstructed_digits(X, outputs, "./models/ae/stacked_denoising_dropout.ckpt")
plt.show()
INFO:tensorflow:Restoring parameters from ./models/ae/stacked_denoising_dropout.ckpt
稀疏自編碼器(Sparse Autoencoders)
- 使得神經元大部分節點被限制的自編碼器稱為稀疏自編碼器。我們可以在代價函式中加入稀疏懲罰,如果目標的稀疏率為0.1,但是所有神經元的平均啟用率為0.3,就需要對當前的引數進行處理,減少啟用的神經元個數。
- 一般稀疏懲罰可以用KL距離進行度量(Kullback-Leibler Divergence),給定兩個離散的概率分佈與,KL距離的計算方法為
假設在coding layer
中,神經元被啟用的概率為,實際的啟用概率(訓練中計算出來)為,則KL距離為