tensorflow-正則化+指數衰減+滑動平均
阿新 • • 發佈:2018-12-15
truncate 設置 put __name__ pri 計算 pytho ida env
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 19 09:42:22 2017
@author: myhaspl
"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE=784
OUTPUT_NODE=10
LAYER1_NODE=500
BATCH_SIZE=100
LEARNING_RATE_BASE=0.8
LEARNING_RATE_DECAY=0.99
REGULARIZATION_RATE=0.0001
TRANING_STEPS=30000
MOVING_AVERAGE_DECAY=0.99
def inference(input_tensor,avg_class,weights1, biases1,weights2,biases2):
if avg_class==None:#非滑動平均
layer1=tf.nn.relu(tf.matmul(input_tensor,weights1)+biases1)
return tf.matmul(layer1,weights2)+biases2
else:#滑動平均
layer1=tf.nn.relu(tf.matmul(input_tensor,avg_class.average(weights1))+avg_class.average(biases1))
return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)
def train(mnist):
#樣本數據與樣本標簽
x_=tf.placeholder(tf.float32,[None,INPUT_NODE],name=‘x_-input‘)
y_=tf.placeholder(tf.float32,[None,OUTPUT_NODE],name=‘y_-input‘)
#參數初始值
weights1=tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=0.1))
biases1=tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))
weights2=tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE],stddev=0.1))
biases2=tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
global_step=tf.Variable(0,trainable=False)
#非滑動平均
y_nohd=inference(x_,None,weights1,biases1,weights2,biases2)
#滑動平均
variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
#滑動平均更新變量的操作
variable_averages_op=variable_averages.apply(tf.trainable_variables())
y_hd=inference(x_,variable_averages,weights1,biases1,weights2,biases2)
#交叉嫡損失函數,使用softmax歸一化
cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_nohd,labels=tf.arg_max(y_,1))
cross_entropy_mean=tf.reduce_mean(cross_entropy)
#加入L2正則化損失
regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
regularization=regularizer(weights1)+regularizer(weights2)
loss=cross_entropy_mean+regularization
#設置指數衰減的學習率
learning_rate=tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples/BATCH_SIZE,
LEARNING_RATE_DECAY)
train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
#訓練與更新參數的滑動平均值
#將2大步操作打包在train_op中,第1大步操作是使用正則化和指數衰減更新參數值
#第2大步操作是使用滑動平均再次更新參數值。
#每次訓練都完成這2大步操作。
train_op=tf.group(train_step,variable_averages_op)
#檢驗滑動平均平均模型的神經網絡前向傳播結果是否正確
correct_predection=tf.equal(tf.argmax(y_hd,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_predection,tf.float32))
#開始訓練過程
with tf.Session() as sess:
tf.initialize_all_variables().run()
#訓練樣本集
validate_feed={x_:mnist.validation.images,
y_:mnist.validation.labels
}
#測試集
test_feed={x_:mnist.test.images,
y_:mnist.test.labels
}
for i in range(TRANING_STEPS):
if i%1000==0:
#每1000輪計算當前訓練的結果
validate_acc=sess.run(accuracy,feed_dict=validate_feed)
print("%d次後=>正確率%g"%(i,validate_acc))
#每一輪使用的樣本,然後開始訓練
xs,ys=mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op,feed_dict={x_:xs,y_:ys})
#TRANING_STEPS次訓練結束,對測試數據進行檢測,檢驗神經網絡準確度
test_acc=sess.run(accuracy,feed_dict=test_feed)
print("正確率:%g"%test_acc)
def main(argv=None):
mnist=input_data.read_data_sets("/tmp/data",one_hot=True)
train(mnist)
if __name__==‘__main__‘:
tf.app.run()
使用了非線性激活函數relu,防止梯度消失。
tensorflow-正則化+指數衰減+滑動平均