from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

def RNN(X, weights, biases):
    # hidden layer for input
    print('the shape of X is',X.shape)
    X = tf.reshape(X, [-1, n_inputs])
    X_in = tf.matmul(X, weights['in']) + biases['in']
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
    print('the shape of X_in is',X_in.shape)
    # cell
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
    _init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=_init_state, time_major=False)
    print('the shape of outputs is',outputs.shape)
    # hidden layer for output as the final results
    # results = tf.matmul(states[1], weights['out']) + biases['out']
    # or
    outputs = tf.transpose(outputs, [1, 0, 2])
    print('the shape of transpose outputs is',outputs.shape)
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']

    return results

# load mnist data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# parameters init
l_r = 0.001
training_iters = 100000
batch_size = 128

n_inputs = 28
n_steps = 28
n_hidden_units = 128
n_classes = 10

# define placeholder for input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

# define w and b
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))

pred = RNN(x, weights, biases)

print('the output of RNN is',pred.shape)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(l_r).minimize(cost)

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# init session
sess = tf.Session()
# init all variables
# start training

# for i in range(training_iters):
for i in range(training_iters):
    # get batch to learn easily
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    batch_x = batch_x.reshape([batch_size, n_steps, n_inputs])
    sess.run(train_op, feed_dict={x: batch_x, y: batch_y})
    if i % 50 == 0:
        print(sess.run(accuracy, feed_dict={x: batch_x, y: batch_y, }))
# test_data = mnist.test.images.reshape([-1, n_steps, n_inputs])
# test_label = mnist.test.labels
# print("Testing Accuracy: ", sess.run(accuracy, feed_dict={x: test_data, y: test_label}))


the shape of X is (?, 28, 28)
the shape of X_in is (?, 28, 128)
the shape of outputs is (128, 28, 128)
the shape of transpose outputs is (28, 128, 128)
the output of RNN is (128, 10)

the shape of X : ? = 不確定的batch大小,第一個28 = n_steps,第二個28 = n_inputs

the shape of X_in :? = batch,28= n_steps,128 = n_hidden_units 


the shape of outputs: 這個沒啥好說的,三個數字對應的意義和X_in一致

the shape of transpose outputs is :這裡只是調換了一下第0維度和第1維度。

得到這個結果之後把outputs unstack得到28個(128,128)的陣列,第1個128 = batch,第2個128 = n_hidden_units


the output of RNN: 128 = batch,10 = n_classes 也就是這128張圖片在10個類別上分別的得分

後面再用softmax_entropy_with_logits(logits = pred, labels = y)balabala 計算cost (y.shape = (128,10),one-hot)

接著用Adam優化 、計算並輸出正確率....




之前看了很多部落格也看了一些知乎回答,看完了還是很糊塗,大家主要都還是解釋LSTM的那幾個門,但是這個輸入格式搞不懂 我後面的知識就有點建造空中樓閣的感覺。。可是這麼一寫完感覺也好簡單呀....搞明白這麼點東西居然花了我一下午時間= = 不過發現了一個快捷的方法,就是在程式碼裡把覺得不清楚的量的shape輸出來看一下,如果重合的數字很多,可以先設定成不一樣的,這樣比較容易對應。比如這道題裡面就可以把batch = 10, n_hidden_units = 5,n_steps = 20,n_inputs = 28,這樣就一目瞭然了。(雖然會報錯 但是對理清思路還是很有幫助的,反正tf是先畫圖再喂資料嘛)另外就是,一定要動筆算!!!