Tensorflow框架(三)
一、MNIST數字識別
首先載入MNIST手寫數字識別訓練集
mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True) # 載入資料集 print("Traing data size:", mnist.train.num_examples) # 訓練集樣本數 print("Validating data size:", mnist.validation.num_examples) # 驗證集樣本數 print("Test data size:", mnist.test.num_examples) # 測試集樣本數 print("Example training data:", mnist.train.images[0]) print("Example training data label:", mnist.train.labels[0])
然後定義所需引數
input_node = 784 # mnist資料集共有28*28個畫素,所以輸入節點共有784 output_node = 10 # 輸出層節點數 layer1_node = 500 # 隱藏層節點數 batch_size = 100 # 一個訓練batch中的訓練資料個數 learning_rate_base = 0.8 # 基礎學習率 learning_rate_decay = 0.99 # 學習率衰減率 regularization_rate = 0.0001 # 正則化項 training_steps = 30000 # 訓練輪數 moving_average_decay = 0.99 # 滑動平均衰減率
之後我們建立一個函式用來實現神經網路的前向傳播過程,同時加入滑動平均。
函式avg_class.average() 計算括號內變數的滑動平均值,這裡的avg_class是最初我們初始化的滑動平均類
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2): # 這裡沒有對結果加入softmax啟用函式,具體參考損失函式的結構 if avg_class == None: layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1) return tf.matmul(layer1, weights2) + biases2 else: # 使用avg_class.average函式計算出變數的滑動平均值 layer1 = tf.nn.relu( tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1)) return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
訓練模型過程
在之前簡單的不新增任何優化演算法的基礎上,按照先後順序分別使用了:
- 滑動平均模型
- L2正則化
- 學習率衰減
同時在我們定義反向優化演算法後,因為之前使用了滑動平均模型,需要使用tf.control_dependencies或tf.group兩種函式,這樣在反向傳播過程中不僅更新了引數,也更新了引數的影子變數。
def train(mnist):
# 定義輸入空白位
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
# 定義神經網路變數引數
weights1 = tf.Variable(tf.truncated_normal([input_node, layer1_node], stddev = 0.1))
biases1 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [layer1_node]))
weights2 = tf.Variable(tf.truncated_normal([layer1_node, output_node], stddev = 0.1))
biases2 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [output_node]))
# 計算神經網路前向傳播的結果
y_hat = inference(x, None, weights1, biases1, weights2, biases2)
# 這裡與之前說到滑動平均模型裡的num_updates變數一致,通過模仿迭代次數來控制衰減速率
global_step = tf.Variable(0, trainable = False)
# 初始化滑動平均類
variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
# 對所有的可訓練的網路引數變數使用滑動平均,也就是所說的GraphKey.TRAINABLE_VARIABLES集合中的元素
# 這裡不包括global_step變數
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# 計算使用滑動平均之後的前向傳播結果
average_y_hat = inference(x, variable_averages, weights1, biases1, weights2, biases2)
# 定義損失函式
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits = y_hat, labels = tf.argmax(y, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 計算L2正則化
regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
# 根據正則化公式,這裡不對偏置項進行計算
regularization = regularizer(weights1) + regularizer(weights2)
# 總的損失等於交叉熵的損失和正則化損失的和
loss = cross_entropy_mean + regularization
# 學習率衰減函式
learning_rate = tf.train.exponential_decay(
learning_rate_base, # 基礎學習率,在此基礎上進行衰減
global_step, # 當前迭代的輪數
mnist.train.num_examples, # 走完所有資料需要的迭代次數
learning_rate_decay) # 學習率衰減速率
# 使用梯度下降法優化
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
# 反向傳播過程中,需要同時更新引數的影子變數
# 下述函式既完成了引數的更新,又能同時更新引數的影子變數
# 下述語句等價於
# train_op = tf.group(train_step, variables_averages_op)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name = 'train')
# 測試輸出結果是否與真實標籤相等
correction_prediction = tf.equal(tf.argmax(average_y_hat, 1), tf.argmax(y, 1))
# 測試一組資料正確率
# 這裡將correction_pred型別改為tf.float32
accuracy = tf.reduce_mean(tf.cast(correction_prediction, tf.float32))
# 引數初始化
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 驗證集輸入字典
validate_feed = {x: mnist.validation.images,
y: mnist.validation.labels}
# 測試集輸入字典
test_feed = {x: mnist.test.images,
y: mnist.test.labels}
# 開始訓練
for i in range(training_steps):
# 產生當前輪的訓練批次
xs, ys = mnist.train.next_batch(batch_size)
sess.run(train_op, feed_dict = {x: xs, y:ys})
# 每一千次訓練測試一下驗證集正確率
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict = validate_feed)
print("After %d training step, validation accuracy using average model is %g" %(i, validate_acc))
# 此時模型已經訓練完成,最終在測試集上測試下正確率
test_acc = sess.run(accuracy, feed_dict = test_feed)
print("validation accuracy using average model is %g" % (test_acc))
最終程式的呼叫打包:
def main(argv = None):
mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
二、變數管理
變數可以通過建立時賦予的名字來使用變數,這應用在網路結構複雜時候的情況。
通過使用tf.get variable建立或獲取變數
# 使用get_variable函式建立名稱為"v"的變數,初始化為給定常量
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
v = tf.Variable(tf.constant(1.0, shape = [1]), name = "v")
這裡tf.get_variable的變數名稱是必填引數,如果有重名變數,程式會報錯,建立失敗:
# 該段程式碼出現報錯,因為重複建立了名稱為"v"的變數
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
w = tf.get_variable("v", shape = [1,2], initializer = tf.constant_initializer(2.0))
那麼現在問題是我們需要獲取已經建立變數,這就需要通過tf.variable_scope函式生成上下文管理器。
下述程式碼表示了這個過程,如果tf.variable_scope如果reuse = False,tf.get_variable將建立新的變數,如果reuse = True,該函式將會直接獲取已經建立的變數。
# 在foo的名稱空間內建立名字為v的變數
with tf.variable_scope("foo"):
v = tf.get_variable(
"v", shape = [1], initializer = tf.constant_initializer(1.0))
# 此時名稱空間foo已存在名字為v的變數,因此下面程式碼會報錯
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1])
# reuse設定為True,tf.get_variable函式將直接獲取已宣告變數
with tf.variable_scope("foo", reuse = True):
v1 = tf.get_variable("v", [1])
print(v == v1)
# 該段程式碼報錯,因為bar空間中沒有設定變數v
with tf.variable_scope("bar", reuse = True):
v = tf.get_variable("v", [1])
同時tf.variable_scope可以巢狀的:
with tf.variable_scope("root"):
# 獲取root名稱空間的reuse
print(tf.get_variable_scope().reuse)
with tf.variable_scope("foo", reuse = True):
# 獲取foo名稱空間的reuse
print(tf.get_variable_scope().reuse)
with tf.variable_scope("bar"):
# 由於沒有指定bar名稱空間的reuse,所以與外面一層的reuse一致
print(tf.get_variable_scope().reuse)
print(tf.get_variable_scope().reuse)
通過tf.variable_scope建立名稱空間,可以用來管理變數名稱:
v1 = tf.get_variable("v", [1])
print(v1.name) # 輸出v:0,v表示了變數名稱,0表示v1生成名稱為v變數的第一個運算結果
with tf.variable_scope("foo"):
v2 = tf.get_variable("v", [1])
print(v2.name) # 輸出foo/v:0,與之前相似,只不過foo/v表示了在名稱空間foo下的變數v
with tf.variable_scope("foo"):
with tf.variable_scope("bar"):
v3 = tf.get_variable("v", [1])
print(v3.name) # 輸出foo/bar/v:0
v4 = tf.get_variable("v1",[1])
print(v4.name) # 輸出foo/v1:0
with tf.variable_scope("",reuse = True):
v5 = tf.get_variable("foo/bar/v", [1])
print(v5 == v3)
v6 = tf.get_variable("foo/v1", [1])
print(v6 == v4)
使用tf.reset_default_graph():重置預設圖
三、模型持久化
Tensorflow通過下述程式碼儲存計算圖
v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2
init = tf.global_variables_initializer()
# 宣告tf.train.Saver儲存模型
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
saver.save(sess, "./model.ckpt")
此時檔案目錄下會出現四個檔案:
model.ckpt.meta,model.ckpt.index,model.ckpt.data-00000-of-00001(此檔名不一定),checkpoint
.meta儲存了網路結構,.index和.data儲存了訓練好的引數,checkpoint記錄最新的模型。
通過下述程式碼恢復模型:
with tf.Session() as sess:
# 載入持久化的圖
saver = tf.train.import_meta_graph("./model.ckpt.meta")
# 檢查最新的儲存點並恢復
saver.restore(sess, tf.train.latest_checkpoint("./"))
當然也可以直接通過下面程式碼恢復模型
with tf.Session() as sess:
saver.restore(sess, "./model.ckpt")
之前說到滑動平均模型,由於每個變數都對應存在一個影子變數,所以在儲存模型的時候也要考慮。下面是儲存滑動平均模型樣例:
v = tf.Variable(0, dtype = tf.float32, name = "v")
# 未宣告滑動平均模型,因此只有一個變數v
# 輸出v:0
for variables in tf.global_variables():
print(variables.name)
ema = tf.train.ExponentialMovingAverage(0.99)
maintain_averages_op = ema.apply(tf.global_variables())
# 宣告滑動平均模型後,自動為變數v生成一個影子變數
# 輸出 v:0 和 v/ExponentialMovingAverage:0
for variables in tf.global_variables():
print(variables.name)
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.assign(v, 10))
sess.run(maintain_averages_op)
# 儲存變數v和其影子變數
saver.save(sess, "./model.ckpt")
print(sess.run([v, ema.average(v)]))
之後讀取模型引數,這裡我們直接將儲存的影子變數換成
v = tf.Variable(0, dtype = tf.float32, name = "v")
# 把儲存的v的影子變數賦給v
saver = tf.train.Saver({"v/ExponentialMovingAverage": v})
with tf.Session() as sess:
saver.restore(sess, "./model.ckpt")
print(sess.run(v))
也可以通過.variables_to_restore(),可以生成變數與其對應影子變數的字典
v = tf.Variable(0, dtype = tf.float32, name = "v")
ema = tf.train.ExponentialMovingAverage(0.99)
# ema.variables_to_restore相當於直接生成了上述程式碼提供的字典
print(ema.variables_to_restore())
saver = tf.train.Saver(ema.variables_to_restore())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.save(sess, "./model.ckpt")
讀取模型引數
with tf.Session() as sess:
saver.restore(sess, "./model.ckpt")
print(sess.run(v))
上述所使用的模型持久化,由於記錄了程式執行所需要的全部資訊,對於變數初始化資訊,模型儲存的輔助資訊都有所記錄,而有時實際使用的時候,只需要通過神經網路前向傳播到輸出層輸出結果。Tensorflow提供了convert_variables_to_constants函式,該函式可以將計算圖中的變數及取值通過常量方式儲存。
import tensorflow as tf
from tensorflow.python.framework import graph_util
v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 匯出當前計算圖的GraphDef部分
graph_def = tf.get_default_graph().as_graph_def()
# 將圖中的變數及取值轉化成常量,同時將圖中不必要的節點去掉(例如變數初始化操作)
output_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, ['add'])
# 將匯出的模型存入檔案
with tf.gfile.GFile("./combined_model.pb", "wb") as f:
f.write(output_graph_def.SerializeToString())
讀取模型
import tensorflow as tf
from tensorflow.python.platform import gfile
with tf.Session() as sess:
model_filename = "./combined_model.pb"
# 讀取儲存的模型檔案,並將檔案解析成對應的GraphDef Protocol Buffer
with gfile.FastGFile(model_filename, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# 將graph_def儲存的圖載入到當前的圖中國,return_element = ["add:0"]給出了返回張量的
# 名稱,在儲存的時候給出的是計算節點的名稱,所以是add,而在載入的時候是張量的名稱
# 所以是add:0
result = tf.import_graph_def(graph_def, return_elements = ["add:0"])
print(sess.run(result))
四、mnist最佳程式樣例
最初的mnist程式樣例沒有涉及儲存模型資訊。
下面給出模型訓練的過程程式:
import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data
# 神經網路結構引數
input_node = 784
output_node = 10
layer1_node = 500
# 生成weight
def get_weight_variable(shape, regularizer):
weights = tf.get_variable(
"weights", shape,
initializer = tf.truncated_normal_initializer(stddev = 0.1))
if regularizer != None:
tf.add_to_collection("losses", regularizer(weights))
return weights
# 神經網路正向傳播
def inference(input_tensor, regularizer):
with tf.variable_scope('layer1'):
weights = get_weight_variable(
[input_node, layer1_node], regularizer)
biases = tf.get_variable(
"biases", [layer1_node],
initializer = tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
with tf.variable_scope('layer2'):
weights = get_weight_variable(
[layer1_node, output_node], regularizer)
biases = tf.get_variable(
"biases", [output_node],
initializer = tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weights) + biases
return layer2
# 配置神經網路引數
batch_size = 100
learning_rate_base = 0.8
learning_rate_decay = 0.99
regularaztion_rate = 0.0001
training_steps = 30000
moving_average_decay = 0.99
# 模型儲存路徑和名稱
model_save_path = "./"
model_name = "model.ckpt"
def train(mnist):
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
regularizer = tf.contrib.layers.l2_regularizer(regularaztion_rate)
y_hat = inference(x, regularizer)
# 滑動平均模型
global_step = tf.Variable(0, trainable = False)
variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# 損失函式
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits = y_hat, labels = tf.argmax(y, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 損失函式加入正則化
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
# 學習率衰減
learning_rate = tf.train.exponential_decay(
learning_rate_base,
global_step,
mnist.train.num_examples / batch_size,
learning_rate_decay)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
# 滑動平均模型反向傳播
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name = 'train')
# 初始化變數
init = tf.global_variables_initializer()
# 初始化持久化類
saver = tf.train.Saver()
# 會話
with tf.Session() as sess:
sess.run(init)
for i in range(training_steps):
xs, ys = mnist.train.next_batch(batch_size)
_, loss_value, _ = sess.run([train_op, loss, global_step],
feed_dict = {x:xs, y:ys})
if i % 1000 == 0:
print("After %d training step, loss on training batch is %g" %(i, loss_value))
saver.save(sess, os.path.join(model_save_path, model_name), global_step = global_step)
mnist = input_data.read_data_sets("./", one_hot = True)
train(mnist)
上述程式碼表示了整個訓練過程,下面提供計算測試集準確率程式碼,該程式碼每10秒讀取計算圖,驗證測試集。
def evaluate(mnist):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
validate_feed = {x:mnist.validation.images, y:mnist.validation.labels}
# 計算前向傳播結果
y_hat = inference(x, None)
# 計算測試集正確率
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_hat, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 定義滑動平均類
variable_averages = tf.train.ExponentialMovingAverage(
moving_average_decay)
# 直接生成變數與其對應的影子變數的字典
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
while True:
with tf.Session() as sess:
# tf.train.get_checkpoint_state函式會通過checkpoint檔案自動找到
# 目錄中最新模型的檔名
ckpt = tf.train.get_checkpoint_state(
model_save_path)
if ckpt and ckpt.model_checkpoint_path:
# 載入模型
saver.restore(sess, ckpt.model_checkpoint_path)
# 通過檔名得到儲存模型儲存時迭代的輪數
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
accuracy_score = sess.run(accuracy, feed_dict = validate_feed)
print("After %s training step, loss on training batch is %g" % (global_step, accuracy_score))
else:
print("No checkpoint file found")
return
time.sleep(eval_interval_secs)
mnist = input_data.read_data_sets("./", one_hot = True)
evaluate(mnist)