1. 程式人生 > >學習筆記TF057:TensorFlow MNIST,卷積神經網路、迴圈神經網路、無監督學習

學習筆記TF057:TensorFlow MNIST,卷積神經網路、迴圈神經網路、無監督學習

構建模型。

定義輸入資料,預處理資料。讀取資料MNIST,得到訓練集圖片、標記矩陣,測試集圖片標記矩陣。trX、trY、teX、teY 資料矩陣表現。trX、teX形狀變為[-1,28,28,1],-1 不考慮輸入圖片數量,28x28 圖片長、寬畫素數,1 通道(channel)數量。MNIST 黑白圖片,通道1。RGB彩色影象,通道3。
初始化權重,定義網路結構。卷積神經網路,3個卷積層、3個池化層、1個全連線層、1個輸出層。
定義dropout佔位符keep_conv,神經元保留比例。生成網路模型,得到預測值。
定義損失函式,tf.nn.softmax_cross_entropy_with_logits 比較預測值、真實值差異,做均值處理。
定義訓練操作(train_op),RMSProp演算法優化器tf.train.RMSPropOptimizer,學習率0.001,衰減值0.9,優化損失。
定義預測操作(predict_op)。
會話啟動圖,訓練、評估。

#!/usr/bin/env python
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
batch_size = 128 # 訓練批次大小
test_size = 256 # 評估批次大小
# 定義初始化權重函式
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))
# 定義神經網路模型函式
# 入參:X 輸入資料,w 每層權重,p_keep_conv、p_keep_hidden dropout保留神經元比例
def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):
    # 第一組卷積層及池化層,dropout部分神經元
    l1a = tf.nn.relu(tf.nn.conv2d(X, w,                       # l1a shape=(?, 28, 28, 32)
                        strides=[1, 1, 1, 1], padding='SAME'))
    l1 = tf.nn.max_pool(l1a, ksize=[1, 2, 2, 1],              # l1 shape=(?, 14, 14, 32)
                        strides=[1, 2, 2, 1], padding='SAME')
    l1 = tf.nn.dropout(l1, p_keep_conv)
    # 第二組卷積層及池化層,dropout部分神經元
    l2a = tf.nn.relu(tf.nn.conv2d(l1, w2,                     # l2a shape=(?, 14, 14, 64)
                        strides=[1, 1, 1, 1], padding='SAME'))
    l2 = tf.nn.max_pool(l2a, ksize=[1, 2, 2, 1],              # l2 shape=(?, 7, 7, 64)
                        strides=[1, 2, 2, 1], padding='SAME')
    l2 = tf.nn.dropout(l2, p_keep_conv)
    # 第三組卷積層及池化層,dropout部分神經元
    l3a = tf.nn.relu(tf.nn.conv2d(l2, w3,                     # l3a shape=(?, 7, 7, 128)
                        strides=[1, 1, 1, 1], padding='SAME'))
    l3 = tf.nn.max_pool(l3a, ksize=[1, 2, 2, 1],              # l3 shape=(?, 4, 4, 128)
                        strides=[1, 2, 2, 1], padding='SAME')
    l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]])    # reshape to (?, 2048)
    l3 = tf.nn.dropout(l3, p_keep_conv)
    # 全連線層,dropout部分神經元
    l4 = tf.nn.relu(tf.matmul(l3, w4))
    l4 = tf.nn.dropout(l4, p_keep_hidden)
    # 輸出層
    pyx = tf.matmul(l4, w_o)
    return pyx # 返回預測值
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
# 資料預處理
trX = trX.reshape(-1, 28, 28, 1)  # 28x28x1 input img
teX = teX.reshape(-1, 28, 28, 1)  # 28x28x1 input img
X = tf.placeholder("float", [None, 28, 28, 1])
Y = tf.placeholder("float", [None, 10])
# 卷積核大小 3x3
# patch大小3x3,輸入維度1,輸出維度32
w = init_weights([3, 3, 1, 32])       # 3x3x1 conv, 32 outputs
# patch大小3x3,輸入維度32,輸出維度64
w2 = init_weights([3, 3, 32, 64])     # 3x3x32 conv, 64 outputs
# patch大小3x3,輸入維度64,輸出維度128
w3 = init_weights([3, 3, 64, 128])    # 3x3x32 conv, 128 outputs
# 全連線層,輸入維度128*4*4 上層輸資料三維轉一維,輸出維度625
w4 = init_weights([128 * 4 * 4, 625]) # FC 128 * 4 * 4 inputs, 625 outputs
# 輸出層,輸入維度625,輸出維度10 代表10類(labels)
w_o = init_weights([625, 10])         # FC 625 inputs, 10 outputs (labels)
# 定義dropout佔位符
p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden) # 得到預測值
# 定義損失函式
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))
# 定義訓練操作
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
# 定義預測操作
predict_op = tf.argmax(py_x, 1)
# Launch the graph in a session
#會話啟動圖
with tf.Session() as sess:
    # you need to initialize all variables
    tf.global_variables_initializer().run()
    for i in range(100):
        # 訓練模型
        training_batch = zip(range(0, len(trX), batch_size),
                             range(batch_size, len(trX)+1, batch_size))
        for start, end in training_batch:
            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end],
                                          p_keep_conv: 0.8, p_keep_hidden: 0.5})
        # 評估模型
        test_indices = np.arange(len(teX)) # Get A Test Batch
        np.random.shuffle(test_indices)
        test_indices = test_indices[0:test_size]
        print(i, np.mean(np.argmax(teY[test_indices], axis=1) ==
                         sess.run(predict_op, feed_dict={X: teX[test_indices],
                                                         p_keep_conv: 1.0,
                                                         p_keep_hidden: 1.0})))

RNN 自然語言處理領域成功應用,機器翻譯、語音識別、影象描述生成(影象特徵生成描述)、語言模型與文字生成(生成模型預測下一單詞概率)。Alex Graves《Supervised Sequence Labelling with Recurrent Neural Networks》 http://www.cs.toronto.edu/~graves/preprint.pdf

構建模型。設定訓練超引數,設定學習率、訓練次數、每輪訓練資料大小。
RNN分類圖片,每張圖片行,畫素序列(sequence)。MNIST圖片大小28x28,28個元素序列 X 28行,每步輸入序列長度28,輸入步數28步。
定義輸入資料、權重。
定義RNN模型。
定義損失函式、優化器(AdamOptimizer)。
定義模型預測結果、準確率計算方法。
會話啟動圖,開始訓練,每20次輸出1次準確率大小。

from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Training Parameters
# 設定訓練超引數
learning_rate = 0.001
training_steps = 10000
batch_size = 128
display_step = 200
# Network Parameters
# 神經網路引數
num_input = 28 # MNIST data input (img shape: 28*28) 輸入層
timesteps = 28 # timesteps 28 長度
num_hidden = 128 # hidden layer num of features 隱藏層神經元數
num_classes = 10 # MNIST total classes (0-9 digits) 輸出數量,分類類別 0~9
# tf Graph input
# 輸入資料佔位符
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])
# Define weights
# 定義權重
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}
# 定義RNN模型
def RNN(x, weights, biases):
    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    # 輸入x轉換成(128 batch * 28 steps, 28 inputs)
    x = tf.unstack(x, timesteps, 1)
    # Define a lstm cell with tensorflow
    # 基本LSTM迴圈網路單元 BasicLSTMCell
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)
# Define loss and optimizer
# 定義損失函式
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
# 定義優化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
    # Run the initializer
    sess.run(init)
    for step in range(1, training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))
    print("Optimization Finished!")
    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

MNIST 無監督學習。自編碼器(autoencoder)。

自編碼網路。UFLDL http://ufldl.stanford.edu/wiki/index.php/Autoencoders_and_Sparsity
監督學習資料有標記。
自編碼網路,輸入樣本壓縮到隱藏層,解壓,輸出端重建樣本。最終輸出層神經元數量等於輸入層神經元資料量。壓縮,輸入資料(影象、文字、聲音)存在不同程度冗餘資訊,自動編碼網路學習去掉冗餘資訊,有用特徵輸入到隱藏層。找到可以代表源資料的主要成分。啟用函式不使用sigmoid等非線性函式,用線性函式,就是PCA模型。
主成分分析(principal components analysis, PCA),分析、簡化資料集技術。減少資料集維數,保持資料集方差貢獻最大特徵。保留低階主成分,忽略高階主成分。最常用線性降維方法。
壓縮過程,限制隱藏神經元數量,學習有意義特徵。希望神經元大部分時間被抑制。神經元輸出接近1為被啟用,接近0為被抑制。部分神經元處於被抑制狀態,稀疏性限制。
多個隱藏層,輸入資料影象,第一層學習識別邊,第二層學習組合邊,構成輪廓、角,更高層學習組合更有意義特徵。

構建模型。設定超引數,學習率、訓練輪數(epoch)、每次訓練資料多少、每隔多少輪顯示一次訓練結果。
定義輸入資料,無監督學習只需要圖片資料,不需要標記資料。
初始化權重,定義網路結構。2個隱藏層,第一個隱藏層神經元256個,第二個隱藏層神經元128個。包括壓縮、解壓過程。
構建損失函式、優化器。損失函式“最小二乘法”,原始資料集和輸出資料集平方差取均值運算。優化器用RMSPropOptimizer。
訓練資料、評估模型。對測試集應用訓練好的自動編碼網路。比較測試集原始圖片和自動編碼網路重建結果。

from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Training Parameters
# 設定訓練超引數
learning_rate = 0.01 # 學習率
num_steps = 30000 # 訓練輪數
batch_size = 256 # 每次訓練資料多少
display_step = 1000 # 每隔多少輪顯示訓練結果
examples_to_show = 10 # 測試集選10張圖片驗證自動編碼器結果
# Network Parameters
# 網路引數
# 第一個隱藏層神經元個數,特徵值個數
num_hidden_1 = 256 # 1st layer num features
# 第二個隱藏層神經元個數,特徵值個數
num_hidden_2 = 128 # 2nd layer num features (the latent dim)
# 輸入資料特徵值個數 28x28=784
num_input = 784 # MNIST data input (img shape: 28*28)
# tf Graph input (only pictures)
# 定義輸入資料,只需要圖片,不要需要標記
X = tf.placeholder("float", [None, num_input])
# 初始化每層權重和偏置
weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1])),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2])),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1])),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input])),
}
biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
    'decoder_b2': tf.Variable(tf.random_normal([num_input])),
}
# Building the encoder
# 定義壓縮函式
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
                                   biases['encoder_b1']))
    # Encoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
                                   biases['encoder_b2']))
    return layer_2
# Building the decoder
# 定義解壓函式
def decoder(x):
    # Decoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
                                   biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
                                   biases['decoder_b2']))
    return layer_2
# Construct model
# 構建模型
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# Prediction
# 得出預測值
y_pred = decoder_op
# Targets (Labels) are the input data.
# 得出真實值,即輸入值
y_true = X
# Define loss and optimizer, minimize the squared error
# 定義損失函式、優化器
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start Training
# Start a new TF session
with tf.Session() as sess:
    # Run the initializer
    sess.run(init)
    # Training
    # 開始訓練
    for i in range(1, num_steps+1):
        # Prepare Data
        # Get the next batch of MNIST data (only images are needed, not labels)
        batch_x, _ = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop) and cost op (to get loss value)
        _, l = sess.run([optimizer, loss], feed_dict={X: batch_x})
        # Display logs per step
        # 每一輪,打印出一次損失值
        if i % display_step == 0 or i == 1:
            print('Step %i: Minibatch Loss: %f' % (i, l))
    # Testing
    # Encode and decode images from test set and visualize their reconstruction.
    n = 4
    canvas_orig = np.empty((28 * n, 28 * n))
    canvas_recon = np.empty((28 * n, 28 * n))
    for i in range(n):
        # MNIST test set
        batch_x, _ = mnist.test.next_batch(n)
        # Encode and decode the digit image
        g = sess.run(decoder_op, feed_dict={X: batch_x})
        # Display original images
        for j in range(n):
            # Draw the original digits
            canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
                batch_x[j].reshape([28, 28])
        # Display reconstructed images
        for j in range(n):
            # Draw the reconstructed digits
            canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
                g[j].reshape([28, 28])
    print("Original Images")
    plt.figure(figsize=(n, n))
    plt.imshow(canvas_orig, origin="upper", cmap="gray")
    plt.show()
    print("Reconstructed Images")
    plt.figure(figsize=(n, n))
    plt.imshow(canvas_recon, origin="upper", cmap="gray")
    plt.show()

參考資料:
《TensorFlow技術解析與實戰》

歡迎推薦上海機器學習工作機會,我的微信:qingxingfengzi