TensorFlow神經網路(四)手寫數字識別
阿新 • • 發佈:2018-11-14
內容來自mooc人工智慧實踐第五講
一、MNIST資料集一些用到的基礎函式語法
############ warm up ! ############
# 匯入imput_data模組
from tensorflow.examples.tutorials.mnist import input_data
# 載入資料集,以讀熱碼的形式存取
mnist = input_data.read_data_sets('./data/', one_hot = True)
# 列印訓練集、驗證集、測試集所含有的樣本數
print("train data size:", mnist.train. num_examples)
print("validation data size:", mnist.validation.num_examples)
print("test data size:", mnist.test.num_examples)
# 檢視訓練集中指定編號的標籤或圖片資料
mnist.train.labels[0]
mnist.train.images[0]
# 將訓練集中一定batchsize的資料和標籤賦給左邊的變數
xs, ys = minist.train.next_batch(BATCH_SIZE)
# 列印形狀
print("xs shape: ", xs.shape)
print("ys shape: ", ys.shape)
# 從集合中取全部變數,生成一個列表
tf.get_collection("")
# 列表內對應元素相加
tf.add_n([])
# 將x轉化為指定型別
tf.cast(x, dtype)
# 對比兩個矩陣或者向量的每個元素,對應元素相等時依次返回True,否則False
A = [[1,3,4,5,6]]
B = [[1,3,4,3,2]]
with tf.Session() as sess:
print(sess.run(tf.equal(A, B)))
# 求均值
# 若不指定第二個引數,則在所有元素中求平均值
# 若第二個引數0,則在第一維元素上取平均值
# 若第二個引數1,則在第二維元素上求平均值
tf.reduce_mean(x, axis)
# 返回axis指定的維度中,列表x最大值對應的索引號
tf.argmax(x, axis)
# 拼接路徑
import os
os.path.join("home", "name") # 返回home/name
# 按指定拆分符,對字串切片,返回分割後的列表(字串)
# 用於從一個檔案中讀取global step的值
'./mode/mnist_model-1001'.split('/')[-1].split('-')[-1] # 返回1001
# 用於復現已經定義好了的神經網路
with tf.Gragh().as_default() as g: # 其內定義的節點在計算圖g中
###### 模型的儲存 ######
# 反向傳播中,一般每隔一定輪數把神經網路模型儲存下來
# 儲存三個檔案
# 1.當前圖結構的.meta檔案
# 2.當前引數名的.index檔案
# 3.當前引數的.data檔案
saver = tf.train.Saver() # 例項化saver物件
with tf.Session() as sess: # 在with結構中迴圈一定輪數時,儲存模型到當前會話
for i in range(STEPS):
if i % 輪數 == 0: # 拼接成./MODEL_SAVE_PATH/MODEL_NAME-global_step路徑
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step = global_step)
# 將神經網路模型中的所有引數等資訊儲存到指定路徑中,並在存放網路模型的資料夾名稱中註明儲存模型時的訓練輪數
# 測試網路效果時,需要將訓練好的神經網路模型載入
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(儲存路徑)
if ckpt and ckpt.model_checkpoint_path: #若ckpt和儲存的模型在指定路徑中存在
saver.restore(sess, ckpt.model_checkpoint_path) #則將儲存的神經網路模型載入到當前會話中
# 載入模型中引數的滑動平均值
# 儲存模型時,若模型中採用了滑動平均,則引數的滑動平均值會儲存在相應檔案中
ema = tf.train.ExponentialMovingAverage(滑動平均基數)
ema_restore = ema.variables_to_restore()
# 例項化可以還原滑動平均值的saver物件
saver = tf.train.Saver(ema_restore)
# 神經網路模型準確率的評估方法
# y 表示在一組batch_size大小的資料上,神經網路模型的預測結果
# y.shape = [batch_size, 10]
# 判斷預測記過張量和實際標籤張量的每個維度是否相等
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
# 將布林值轉化為實數型
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
二、測試過程test.py及主函式
######## test.py ##########
def test(mnist):
with tf.Gragh().as_default() as g:
#佔位
x = tf.placeholder(dtype, shape)
y_= tf.placeholder(dtype, shape)
# 前向傳播,預測結果y
y = mnist_forward.forward(x, None)
# 例項化可以還原滑動平均的saver
ema = tf.train.ExponentialMovingAverage(滑動衰減率)
ema_restore = ema.variables_to_restore()
# 例項化可以還原滑動平均值的saver物件
saver = tf.train.Saver(ema_restore)
# 計算正確率
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
while True:
with tf.Session() as sess:
# 載入訓練好的模型
ckpt = tf.train.get_checkpoint_state(儲存路徑)
#若ckpt和儲存的模型在指定路徑中存在
if ckpt and ckpt.model_checkpoint_path:
# 恢復會話
saver.restore(sess, ckpt.model_checkpoint_path)
# 恢復輪數
global_ste = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
# 計算準確率
accuracy_score = sess.run(accuracy, feed_dict = {x:測試資料, y_:測試資料標籤})
# 列印提示
print("after %s training steps, test_accuracy = %g" %(global_step, accuracy_score))
#如果沒有模型
else:
print("no checkpoint file found")
return
######## main function #############
def main():
mnist = input_data.read_data_sets('./data/', one_hot = True)
# 呼叫定義好的測試函式
test(mnist)
if __name__ == '__main__':
main()
三、完整程式碼
- ①
mnist_forward.py
# mnist_forward.py
# coding: utf-8
import tensorflow as tf
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
# 給w賦初值,並把w的正則化損失加到總損失中
def get_weight(shape, regularizer):
w = tf.Variable(tf.truncated_normal(shape, stddev = 0.1))
if regularizer != None: tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
# 給b賦初值
def get_bias(shape):
b = tf.Variable(tf.zeros(shape))
return b
def forward(x, regularizer):
w1 = get_weight([INPUT_NODE, LAYER1_NODE], regularizer)
b1 = get_bias([LAYER1_NODE])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([LAYER1_NODE, OUTPUT_NODE], regularizer)
b2 = get_bias([OUTPUT_NODE])
y = tf.matmul(y1, w2) + b2 #輸出層不通過啟用函式
return y
- ②
mnist_backward.py
# mnist_backward.py
# coding: utf-8
import tensorflow as tf
# 匯入imput_data模組
from tensorflow.examples.tutorials.mnist import input_data
import mnist_forward
import os
# 定義超引數
BATCH_SIZE = 200
LEARNING_RATE_BASE = 0.1 #初始學習率
LEARNING_RATE_DECAY = 0.99 # 學習率衰減率
REGULARIZER = 0.0001 # 正則化引數
STEPS = 50000 #訓練輪數
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "./model/"
MODEL_NAME = "mnist_model"
def backward(mnist):
# placeholder佔位
x = tf.placeholder(tf.float32, shape = (None, mnist_forward.INPUT_NODE))
y_ = tf.placeholder(tf.float32, shape = (None, mnist_forward.OUTPUT_NODE))
# 前向傳播推測輸出y
y = mnist_forward.forward(x, REGULARIZER)
# 定義global_step輪數計數器,定義為不可訓練
global_step = tf.Variable(0, trainable = False)
# 包含正則化的損失函式
# 交叉熵
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = y, labels = tf.argmax(y_, 1))
cem = tf.reduce_mean(ce)
# 使用正則化時的損失函式
loss = cem + tf.add_n(tf.get_collection('losses'))
# 定義指數衰減學習率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase = True)
# 定義反向傳播方法:包含正則化
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
# 定義滑動平均時,加上:
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
ema_op = ema.apply(tf.trainable_variables())
with tf.control_dependencies([train_step, ema_op]):
train_op = tf.no_op(name = 'train')
# 例項化saver
saver = tf.train.Saver()
# 訓練過程
with tf.Session() as sess:
# 初始化所有引數
init_op = tf.global_variables_initializer()
sess.run(init_op)
# 迴圈迭代
for i in range(STEPS):
# 將訓練集中一定batchsize的資料和標籤賦給左邊的變數
xs, ys = mnist.train.next_batch(BATCH_SIZE)
# 喂入神經網路,執行訓練過程train_step
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict = {x: xs, y_: ys})
if i % 1000 == 0: # 拼接成./MODEL_SAVE_PATH/MODEL_NAME-global_step路徑
# 列印提示
print("after %d steps, loss on traing batch is %g" %(step, loss_value))
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step = global_step)
def main():
mnist = input_data.read_data_sets('./data/', one_hot = True)
# 呼叫定義好的測試函式
backward(mnist)
# 判斷python執行檔案是否為主檔案,如果是,則執行
if __name__ == '__main__':
main()
- ③
mnist_test.py
# coding:utf-8
# mnist_test.py
# 延時
import time
import tensorflow as tf
# 匯入imput_data模組
from tensorflow.examples.tutorials.mnist import input_data
import mnist_forward
import mnist_backward
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #hide warnings
# 程式迴圈間隔時間5秒
TEST_INTERVAL_SECS = 5
def test(mnist):
# 用於復現已經定義好了的神經網路
with tf.Graph().as_default() as g: # 其內定義的節點在計算圖g中
# placeholder佔位
x = tf.placeholder(tf.float32, shape=(None, mnist_forward.INPUT_NODE))
y_ = tf.placeholder(tf.float32, shape=(None, mnist_forward.OUTPUT_NODE))
# 前向傳播推測輸出y
y = mnist_forward.forward(x, None)
# 例項化帶滑動平均的saver物件
# 這樣,所有引數在會話中被載入時,會被複製為各自的滑動平均值
ema = tf.train.ExponentialMovingAverage(mnist_backward.MOVING_AVERAGE_DECAY)
ema_restore = ema.variables_to_restore()
saver = tf.train.Saver(ema_restore)
# 計算正確率
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
while True:
with tf.Session() as sess:
# 載入訓練好的模型,也即把滑動平均值賦給各個引數
ckpt = tf.train.get_checkpoint_state(mnist_backward.MODEL_SAVE_PATH)
#若ckpt和儲存的模型在指定路徑中存在
if ckpt and ckpt.model_checkpoint_path:
# 恢復會話
saver.restore(sess, ckpt.model_checkpoint_path)
# 恢復輪數
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
# 計算準確率
accuracy_score = sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# 列印提示
print("after %s training steps, test accuracy = %g" % (global_step, accuracy_score))
#如果沒有模型
else:
print("no checkpoint file found")
return
time.sleep(TEST_INTERVAL_SECS)
def main():
mnist = input_data.read_data_sets('./data/', one_hot=True)
# 呼叫定義好的測試函式
test(mnist)
if __name__ == '__main__':
main()
從終端執行結果可以看出,隨著訓練輪數的增加,網路模型的損失函式值不斷降低,並且在測試集上的準確率在不斷提升,有較好的泛化能力。
從上圖結果可以看出,最終迭代後準確率基本穩定不變了。