1. 程式人生 > >tensorflow課堂筆記(四)

tensorflow課堂筆記(四)

學習率

"""
學習率 learning_rate:每次引數更新的幅度
wn+1 = wn - learning_rate▽
相當於每次在梯度反方向減少的幅度,因為梯度是增加最大的方向,我們要找到極小值
我們優化引數的目的就是讓loss損失函式最小,所以每次都減少一點梯度方向的值
"""
#coding utf-8
import tensorflow as tf
#定義待優化引數w初值賦5
"""
w = tf.Variable(tf.constant(5, dtype=tf.float32))
#定義損失函式
loss = tf.square(w+1)
#定義反向傳播方法
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)
學習率如果是1,則不收斂,如果為0.0001,則收斂速度很慢
指數衰減學習率
learning_rate = LEARNING_RATE_BASE*LEARNING_RATE_DECAY*(global_step/LEARNING_RATE_STEP)
右邊引數依次為 學習率初始值,學習率衰減率(0,1),運行了幾輪/多少輪更新一次學習率
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
LEARNING_RATE_STEP,
LEARNING_RATE_DECAY,
staircase=True)
#生成會話,訓練40輪
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for i in range(40):
        sess.run(train_step)
        w_val = sess.run(w)
        loss_val = sess.run(loss)
        print("After %d steps: w is %f, loss is %f"%(i,w_val,loss_val))

收斂結果:
After 39 steps: w is -1.000000, loss is 0.000000
"""
#下面是指數衰減學習率程式碼
LEARNING_RATE_BASE = 0.1 #最初學習率
LEARNING_RATE_DECAY =  0.99 #學習率衰減率
LEARNING_RATE_STEP = 1 #喂入多少輪BATCH_STEP後,更新依次學習率,總樣本數/BATCH_SIZE
#運行了幾輪BATCH_SIZE的計數器,初值給0,設為不被訓練
global_step = tf.Variable(0, trainable=False)
#定義指數下降學習率
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,LEARNING_RATE_STEP,\
                                           LEARNING_RATE_DECAY,staircase=True)
#定義待優化引數,賦初值為5
w = tf.Variable(tf.constant(5, dtype=tf.float32))
#定義損失函式
loss = tf.square(w+1)
#定義反向傳播方法
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
#聲稱會話,訓練40輪
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    STEPS=40
    for i in range(STEPS):
        sess.run(train_step)
        w_val = sess.run(w)
        loss_val = sess.run(loss)
        print("After %d steps,global_step is %f, w is %f,learning_rate is %f loss is %f"%(i+1,sess.run(global_step),w_val,sess.run(learning_rate),loss_val))
"""
訓練結果:
After 40 steps,global_step is 40.000000, w is -0.995731,learning_rate is 0.066897 loss is 0.000018
"""

滑動平均

"""
滑動平均(影子值):記錄了每個引數一段時間內過往值的平均,增加了模型的泛化性
影子=衰減率影子+(1-衰減率)*引數   影子初值=引數初值
ema = tf.train.ExponentialMovingAverage(
衰減率MOVING_AVERAGE_DECAY,
當前輪數global_step)
mea_op=ema.apple([])
ema_op=ema.apply(tf.trainable_variables())
with if.control_dependencies([train_step,ema_op]):
    train_op=tf.no_op(name='train')

ema.average(引數名) 檢視某引數的滑動平均值
"""
#coding utf-8
import tensorflow as tf
#1定義變數及滑動平均類
#定義一個32為浮點變數,初始值為0.0,這個程式碼就是不斷更新w1引數,優化w1引數,滑動平均做了個w1的影子
w1 = tf.Variable(0, dtype=tf.float32)
#定義num_updates(NN的迭代輪數), 初始值為0, 不可被優化
global_step = tf.Variable(0, trainable=False)
#例項化滑動平均類,給刪減率為0.99,當前輪數global_step
MOVING_AVERAGE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
#ema.apply後的括號裡是更新列表,每次執行sess.run(ema_op)時,對更新列表中元素取滑動平均
#在實際運用中會使用tf.trainable_variables()自動將所有待訓練的引數彙總為列表
#ema_op = ema.apply([w1])
ema_op = ema.apply(tf.trainable_variables())

#2檢視不同迭代中變數取值的變化
with tf.Session() as sess:
    #初始化
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    #用ema.averge(w1)獲取w1滑動平均值
    #列印引數w1和其滑動平均值
    print(sess.run([w1,ema.average(w1)]))

    #引數w1賦值為1
    sess.run(tf.assign(w1, 1))
    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    #更新step和w1的值,模擬出100輪迭代後,引數w1變為10
    sess.run(tf.assign(global_step, 100))   #賦值操作
    sess.run(tf.assign(w1, 10))             #賦值操作
    sess.run(ema_op)                        #更新一次滑動平均
    print(sess.run([w1, ema.average(w1)]))

    #每次sess.run會更新依次w1滑動平均值
    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

"""
執行結果:
[0.0, 0.0]
[1.0, 0.9]
[10.0, 1.6445453]
[10.0, 2.3281732]
[10.0, 2.955868]
[10.0, 3.532206]
[10.0, 4.061389]
[10.0, 4.547275]
[10.0, 4.9934072]
"""