1. 程式人生 > >Tensorflow框架(三)

Tensorflow框架(三)

一、MNIST數字識別

首先載入MNIST手寫數字識別訓練集

mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True) # 載入資料集
print("Traing data size:", mnist.train.num_examples)          # 訓練集樣本數
print("Validating data size:", mnist.validation.num_examples) # 驗證集樣本數
print("Test data size:", mnist.test.num_examples)             # 測試集樣本數
print("Example training data:", mnist.train.images[0])
print("Example training data label:", mnist.train.labels[0])

然後定義所需引數

input_node = 784 # mnist資料集共有28*28個畫素,所以輸入節點共有784
output_node = 10 # 輸出層節點數
layer1_node = 500 # 隱藏層節點數
batch_size = 100 # 一個訓練batch中的訓練資料個數
learning_rate_base = 0.8 # 基礎學習率
learning_rate_decay = 0.99 # 學習率衰減率
regularization_rate = 0.0001 # 正則化項
training_steps = 30000 # 訓練輪數
moving_average_decay = 0.99 # 滑動平均衰減率

之後我們建立一個函式用來實現神經網路的前向傳播過程,同時加入滑動平均。

函式avg_class.average() 計算括號內變數的滑動平均值,這裡的avg_class是最初我們初始化的滑動平均類

def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 這裡沒有對結果加入softmax啟用函式,具體參考損失函式的結構
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        # 使用avg_class.average函式計算出變數的滑動平均值
        layer1 = tf.nn.relu(
        tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)

訓練模型過程

在之前簡單的不新增任何優化演算法的基礎上,按照先後順序分別使用了:

  • 滑動平均模型
  • L2正則化
  • 學習率衰減

同時在我們定義反向優化演算法後,因為之前使用了滑動平均模型,需要使用tf.control_dependencies或tf.group兩種函式,這樣在反向傳播過程中不僅更新了引數,也更新了引數的影子變數。

def train(mnist):
    # 定義輸入空白位
    x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
    y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
    
    # 定義神經網路變數引數
    weights1 = tf.Variable(tf.truncated_normal([input_node, layer1_node], stddev = 0.1))
    biases1 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [layer1_node]))
    weights2 = tf.Variable(tf.truncated_normal([layer1_node, output_node], stddev = 0.1))
    biases2 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [output_node]))
    
    # 計算神經網路前向傳播的結果
    y_hat = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 這裡與之前說到滑動平均模型裡的num_updates變數一致,通過模仿迭代次數來控制衰減速率
    global_step = tf.Variable(0, trainable = False)
    
    # 初始化滑動平均類
    variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
    
    # 對所有的可訓練的網路引數變數使用滑動平均,也就是所說的GraphKey.TRAINABLE_VARIABLES集合中的元素
    # 這裡不包括global_step變數
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    
    # 計算使用滑動平均之後的前向傳播結果
    average_y_hat = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 定義損失函式
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits = y_hat, labels = tf.argmax(y, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 計算L2正則化
    regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
    
    # 根據正則化公式,這裡不對偏置項進行計算
    regularization = regularizer(weights1) + regularizer(weights2)
    
    # 總的損失等於交叉熵的損失和正則化損失的和
    loss = cross_entropy_mean + regularization
    
    # 學習率衰減函式
    learning_rate = tf.train.exponential_decay(
    learning_rate_base, # 基礎學習率,在此基礎上進行衰減
    global_step,        # 當前迭代的輪數
    mnist.train.num_examples, # 走完所有資料需要的迭代次數
    learning_rate_decay) # 學習率衰減速率
    
    # 使用梯度下降法優化
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
    
    # 反向傳播過程中,需要同時更新引數的影子變數
    # 下述函式既完成了引數的更新,又能同時更新引數的影子變數
    # 下述語句等價於
    # train_op = tf.group(train_step, variables_averages_op)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name = 'train')
    
    # 測試輸出結果是否與真實標籤相等
    correction_prediction = tf.equal(tf.argmax(average_y_hat, 1), tf.argmax(y, 1))
    
    # 測試一組資料正確率
    # 這裡將correction_pred型別改為tf.float32
    accuracy = tf.reduce_mean(tf.cast(correction_prediction, tf.float32))
    
    # 引數初始化
    init = tf.global_variables_initializer()
    with tf.Session() as sess:    
        sess.run(init)
    
        # 驗證集輸入字典
        validate_feed = {x: mnist.validation.images,
                         y: mnist.validation.labels}
        # 測試集輸入字典
        test_feed = {x: mnist.test.images,
                     y: mnist.test.labels}
        
        # 開始訓練
        for i in range(training_steps):
            # 產生當前輪的訓練批次
            xs, ys = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict = {x: xs, y:ys})
            
            # 每一千次訓練測試一下驗證集正確率
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict = validate_feed)
                print("After %d training step, validation accuracy using average model is %g" %(i, validate_acc))
        
        # 此時模型已經訓練完成,最終在測試集上測試下正確率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("validation accuracy using average model is %g" % (test_acc))

最終程式的呼叫打包:

def main(argv = None):
    mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True)
    train(mnist)


if __name__ == '__main__':
    tf.app.run()

 

二、變數管理

變數可以通過建立時賦予的名字來使用變數,這應用在網路結構複雜時候的情況。

通過使用tf.get variable建立或獲取變數

# 使用get_variable函式建立名稱為"v"的變數,初始化為給定常量
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
v = tf.Variable(tf.constant(1.0, shape = [1]), name = "v")

 

這裡tf.get_variable的變數名稱是必填引數,如果有重名變數,程式會報錯,建立失敗:

# 該段程式碼出現報錯,因為重複建立了名稱為"v"的變數
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
w = tf.get_variable("v", shape = [1,2], initializer = tf.constant_initializer(2.0))

那麼現在問題是我們需要獲取已經建立變數,這就需要通過tf.variable_scope函式生成上下文管理器。

下述程式碼表示了這個過程,如果tf.variable_scope如果reuse = False,tf.get_variable將建立新的變數,如果reuse = True,該函式將會直接獲取已經建立的變數。

# 在foo的名稱空間內建立名字為v的變數
with tf.variable_scope("foo"):
    v = tf.get_variable(
    "v", shape = [1], initializer = tf.constant_initializer(1.0))

# 此時名稱空間foo已存在名字為v的變數,因此下面程式碼會報錯
with tf.variable_scope("foo"):
    v = tf.get_variable("v", [1])

# reuse設定為True,tf.get_variable函式將直接獲取已宣告變數
with tf.variable_scope("foo", reuse = True):
    v1 = tf.get_variable("v", [1])
    print(v == v1)

# 該段程式碼報錯,因為bar空間中沒有設定變數v
with tf.variable_scope("bar", reuse = True):
    v = tf.get_variable("v", [1])

同時tf.variable_scope可以巢狀的:

with tf.variable_scope("root"):
    # 獲取root名稱空間的reuse
    print(tf.get_variable_scope().reuse)
    with tf.variable_scope("foo", reuse = True):
        # 獲取foo名稱空間的reuse
        print(tf.get_variable_scope().reuse)
        with tf.variable_scope("bar"):
            # 由於沒有指定bar名稱空間的reuse,所以與外面一層的reuse一致
            print(tf.get_variable_scope().reuse)
    print(tf.get_variable_scope().reuse)

通過tf.variable_scope建立名稱空間,可以用來管理變數名稱:

v1 = tf.get_variable("v", [1])
print(v1.name) # 輸出v:0,v表示了變數名稱,0表示v1生成名稱為v變數的第一個運算結果

with tf.variable_scope("foo"):
    v2 = tf.get_variable("v", [1])
    print(v2.name) # 輸出foo/v:0,與之前相似,只不過foo/v表示了在名稱空間foo下的變數v

with tf.variable_scope("foo"):
    with tf.variable_scope("bar"):
        v3 = tf.get_variable("v", [1])
        print(v3.name) # 輸出foo/bar/v:0
    v4 = tf.get_variable("v1",[1])
    print(v4.name) # 輸出foo/v1:0
with tf.variable_scope("",reuse = True):
    v5 = tf.get_variable("foo/bar/v", [1])
    print(v5 == v3)
    v6 = tf.get_variable("foo/v1", [1])
    print(v6 == v4)

使用tf.reset_default_graph():重置預設圖

 

三、模型持久化

Tensorflow通過下述程式碼儲存計算圖

v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2

init = tf.global_variables_initializer()

# 宣告tf.train.Saver儲存模型
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    saver.save(sess, "./model.ckpt")

此時檔案目錄下會出現四個檔案:

model.ckpt.meta,model.ckpt.index,model.ckpt.data-00000-of-00001(此檔名不一定),checkpoint

.meta儲存了網路結構,.index和.data儲存了訓練好的引數,checkpoint記錄最新的模型。

通過下述程式碼恢復模型:

with tf.Session() as sess:
    # 載入持久化的圖
    saver = tf.train.import_meta_graph("./model.ckpt.meta")

    # 檢查最新的儲存點並恢復
    saver.restore(sess, tf.train.latest_checkpoint("./"))

當然也可以直接通過下面程式碼恢復模型

with tf.Session() as sess:
    saver.restore(sess, "./model.ckpt")

 之前說到滑動平均模型,由於每個變數都對應存在一個影子變數,所以在儲存模型的時候也要考慮。下面是儲存滑動平均模型樣例:

v = tf.Variable(0, dtype = tf.float32, name = "v")

# 未宣告滑動平均模型,因此只有一個變數v
# 輸出v:0
for variables in tf.global_variables():
    print(variables.name)
    
ema = tf.train.ExponentialMovingAverage(0.99)
maintain_averages_op = ema.apply(tf.global_variables())

# 宣告滑動平均模型後,自動為變數v生成一個影子變數
# 輸出 v:0 和 v/ExponentialMovingAverage:0
for variables in tf.global_variables():
    print(variables.name)
    
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    sess.run(tf.assign(v, 10))
    sess.run(maintain_averages_op)
    # 儲存變數v和其影子變數
    saver.save(sess, "./model.ckpt")
    print(sess.run([v, ema.average(v)]))

之後讀取模型引數,這裡我們直接將儲存的影子變數換成

v = tf.Variable(0, dtype = tf.float32, name = "v")
# 把儲存的v的影子變數賦給v
saver = tf.train.Saver({"v/ExponentialMovingAverage": v})
with tf.Session() as sess:
    saver.restore(sess, "./model.ckpt")
    print(sess.run(v))

也可以通過.variables_to_restore(),可以生成變數與其對應影子變數的字典

v = tf.Variable(0, dtype = tf.float32, name = "v")
ema = tf.train.ExponentialMovingAverage(0.99)

# ema.variables_to_restore相當於直接生成了上述程式碼提供的字典
print(ema.variables_to_restore())
saver = tf.train.Saver(ema.variables_to_restore())


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.save(sess, "./model.ckpt")

讀取模型引數

with tf.Session() as sess:
    saver.restore(sess, "./model.ckpt")
    print(sess.run(v))

 

上述所使用的模型持久化,由於記錄了程式執行所需要的全部資訊,對於變數初始化資訊,模型儲存的輔助資訊都有所記錄,而有時實際使用的時候,只需要通過神經網路前向傳播到輸出層輸出結果。Tensorflow提供了convert_variables_to_constants函式,該函式可以將計算圖中的變數及取值通過常量方式儲存。

import tensorflow as tf
from tensorflow.python.framework import graph_util

v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    # 匯出當前計算圖的GraphDef部分
    graph_def = tf.get_default_graph().as_graph_def()

    # 將圖中的變數及取值轉化成常量,同時將圖中不必要的節點去掉(例如變數初始化操作)
    output_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, ['add'])
    
    # 將匯出的模型存入檔案
    with tf.gfile.GFile("./combined_model.pb", "wb") as f:
        f.write(output_graph_def.SerializeToString())

讀取模型

import tensorflow as tf
from tensorflow.python.platform import gfile
with tf.Session() as sess:
    model_filename = "./combined_model.pb"
    
    # 讀取儲存的模型檔案,並將檔案解析成對應的GraphDef Protocol Buffer
    with gfile.FastGFile(model_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        
    # 將graph_def儲存的圖載入到當前的圖中國,return_element = ["add:0"]給出了返回張量的
    # 名稱,在儲存的時候給出的是計算節點的名稱,所以是add,而在載入的時候是張量的名稱
    # 所以是add:0
    result = tf.import_graph_def(graph_def, return_elements = ["add:0"])
    print(sess.run(result))

 

四、mnist最佳程式樣例

最初的mnist程式樣例沒有涉及儲存模型資訊。

下面給出模型訓練的過程程式:

import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data

# 神經網路結構引數
input_node = 784
output_node = 10
layer1_node = 500

# 生成weight
def get_weight_variable(shape, regularizer):
    weights = tf.get_variable(
    "weights", shape,
    initializer = tf.truncated_normal_initializer(stddev = 0.1))
    
    if regularizer != None:
        tf.add_to_collection("losses", regularizer(weights))
        
    return weights

# 神經網路正向傳播
def inference(input_tensor, regularizer):
    with tf.variable_scope('layer1'):
        weights = get_weight_variable(
            [input_node, layer1_node], regularizer)
        biases = tf.get_variable(
            "biases", [layer1_node],
            initializer = tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
    
    with tf.variable_scope('layer2'):
        weights = get_weight_variable(
            [layer1_node, output_node], regularizer)
        biases = tf.get_variable(
            "biases", [output_node],
            initializer = tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases
    return layer2

# 配置神經網路引數
batch_size = 100
learning_rate_base = 0.8
learning_rate_decay = 0.99
regularaztion_rate = 0.0001
training_steps = 30000
moving_average_decay = 0.99

# 模型儲存路徑和名稱
model_save_path = "./"
model_name = "model.ckpt"

def train(mnist):
    x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
    y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
    
    regularizer = tf.contrib.layers.l2_regularizer(regularaztion_rate)
    
    y_hat = inference(x, regularizer)
    
    # 滑動平均模型
    global_step = tf.Variable(0, trainable = False)
    variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    
    # 損失函式
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits = y_hat, labels = tf.argmax(y, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    # 損失函式加入正則化
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    
    # 學習率衰減
    learning_rate = tf.train.exponential_decay(
        learning_rate_base,
        global_step,
        mnist.train.num_examples / batch_size,
        learning_rate_decay)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
    
    # 滑動平均模型反向傳播
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name = 'train')
    
    # 初始化變數
    init = tf.global_variables_initializer()
    
    # 初始化持久化類
    saver = tf.train.Saver()
    
    # 會話
    with tf.Session() as sess:
        sess.run(init)
        
        for i in range(training_steps):
            xs, ys = mnist.train.next_batch(batch_size)
            _, loss_value, _ = sess.run([train_op, loss, global_step],
                                       feed_dict = {x:xs, y:ys})
            
            if i % 1000 == 0:
                print("After %d training step, loss on training batch is %g" %(i, loss_value))
                saver.save(sess, os.path.join(model_save_path, model_name), global_step = global_step)

mnist = input_data.read_data_sets("./", one_hot = True)
train(mnist)

上述程式碼表示了整個訓練過程,下面提供計算測試集準確率程式碼,該程式碼每10秒讀取計算圖,驗證測試集。

def evaluate(mnist):
    with tf.Graph().as_default() as g:
        x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
        y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
        validate_feed = {x:mnist.validation.images, y:mnist.validation.labels}
        
        # 計算前向傳播結果
        y_hat = inference(x, None)
        
        # 計算測試集正確率
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_hat, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        # 定義滑動平均類
        variable_averages = tf.train.ExponentialMovingAverage(
            moving_average_decay)
        
        # 直接生成變數與其對應的影子變數的字典
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        
        while True:
            with tf.Session() as sess:
                # tf.train.get_checkpoint_state函式會通過checkpoint檔案自動找到
                # 目錄中最新模型的檔名
                ckpt = tf.train.get_checkpoint_state(
                    model_save_path)
                if ckpt and ckpt.model_checkpoint_path:
                    # 載入模型
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # 通過檔名得到儲存模型儲存時迭代的輪數
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    
                    accuracy_score = sess.run(accuracy, feed_dict = validate_feed)
                    
                    print("After %s training step, loss on training batch is %g" % (global_step, accuracy_score))
                else:
                    print("No checkpoint file found")
                    return
            time.sleep(eval_interval_secs)

mnist = input_data.read_data_sets("./", one_hot = True)
evaluate(mnist)