1. 程式人生 > >tensorflow中optimizer自動訓練簡介和選擇訓練variable的方法







import tensorflow as tf

#define variable and error
label = tf.constant(1,dtype = tf.float32)
prediction_to_train = tf.Variable(3,dtype=tf.float32)

#define losses and train
manual_compute_loss = tf.square(prediction_to_train - label)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_step = optimizer.minimize(manual_compute_loss)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    for _ in range(100):
        print('variable is ', sess.run(prediction_to_train), ' and the loss is ',sess.run(manual_compute_loss))


variable is  3.0  and the loss is  4.0
variable is  2.96  and the loss is  3.8416002
variable is  2.9208  and the loss is  3.6894724
variable is  2.882384  and the loss is  3.5433698
variable is  2.8447363  and the loss is  3.403052
variable is  2.8078415  and the loss is  3.268291

variable is  2.0062745  and the loss is  1.0125883
variable is  1.986149  and the loss is  0.9724898
variable is  1.966426  and the loss is  0.9339792


variable is  1.0000029  and the loss is  8.185452e-12
variable is  1.0000029  and the loss is  8.185452e-12
variable is  1.0000029  and the loss is  8.185452e-12
variable is  1.0000029  and the loss is  8.185452e-12
variable is  1.0000029  and the loss is  8.185452e-12




#define variable and error
label = tf.constant(1,dtype = tf.float32)
x = tf.placeholder(dtype = tf.float32)
w1 = tf.Variable(4,dtype=tf.float32)
w2 = tf.Variable(4,dtype=tf.float32)
w3 = tf.constant(4,dtype=tf.float32)

y_predict = w1*x+w2*x+w3*x

#define losses and train
make_up_loss = tf.square(y_predict - label)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_step = optimizer.minimize(make_up_loss)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    for _ in range(100):
        w1_,w2_,w3_,loss_ = sess.run([w1,w2,w3,make_up_loss],feed_dict={x:1})
        print('variable is w1:',w1_,' w2:',w2_,' w3:',w3_, ' and the loss is ',loss_)


variable is w1: -1.3765227  w2: -1.3765227  w3: 4.0  and the loss is  0.06098667
variable is w1: -1.3814617  w2: -1.3814617  w3: 4.0  and the loss is  0.056205332
variable is w1: -1.3862033  w2: -1.3862033  w3: 4.0  and the loss is  0.051798765
variable is w1: -1.3907552  w2: -1.3907552  w3: 4.0  and the loss is  0.047737725
variable is w1: -1.3951249  w2: -1.3951249  w3: 4.0  and the loss is  0.043995135
variable is w1: -1.3993199  w2: -1.3993199  w3: 4.0  and the loss is  0.04054594
variable is w1: -1.4033471  w2: -1.4033471  w3: 4.0  and the loss is  0.03736715


#define variable and error
label = tf.constant(1,dtype = tf.float32)
x = tf.placeholder(dtype = tf.float32)
w1 = tf.Variable(4,dtype=tf.float32)
w2 = tf.Variable(4,dtype=tf.float32)
w3 = tf.constant(4,dtype=tf.float32)

y_predict = w1*x+w2*x+w3*x

#define losses and train
make_up_loss = tf.square(y_predict - label)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_step = optimizer.minimize(make_up_loss,var_list = w2)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    for _ in range(500):
        w1_,w2_,w3_,loss_ = sess.run([w1,w2,w3,make_up_loss],feed_dict={x:1})
        print('variable is w1:',w1_,' w2:',w2_,' w3:',w3_, ' and the loss is ',loss_)
variable is w1: 4.0  w2: -6.99948  w3: 4.0  and the loss is  2.7063857e-07
variable is w1: 4.0  w2: -6.9994903  w3: 4.0  and the loss is  2.5983377e-07
variable is w1: 4.0  w2: -6.9995003  w3: 4.0  and the loss is  2.4972542e-07
variable is w1: 4.0  w2: -6.9995103  w3: 4.0  and the loss is  2.398176e-07
variable is w1: 4.0  w2: -6.9995203  w3: 4.0  and the loss is  2.3011035e-07
variable is w1: 4.0  w2: -6.99953  w3: 4.0  and the loss is  2.2105178e-07
variable is w1: 4.0  w2: -6.9995394  w3: 4.0  and the loss is  2.1217511e-07


ValueError: No variables to optimize.


#demo2.2  another way to collect var_list

label = tf.constant(1,dtype = tf.float32)
x = tf.placeholder(dtype = tf.float32)
w1 = tf.Variable(4,dtype=tf.float32)
with tf.name_scope(name='selected_variable_to_trian'):
    w2 = tf.Variable(4,dtype=tf.float32)
w3 = tf.constant(4,dtype=tf.float32)

y_predict = w1*x+w2*x+w3*x

#define losses and train
make_up_loss = (y_predict - label)**3
optimizer = tf.train.GradientDescentOptimizer(0.01)

output_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='selected_variable_to_trian')
train_step = optimizer.minimize(make_up_loss,var_list = output_vars)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    for _ in range(3000):
        w1_,w2_,w3_,loss_ = sess.run([w1,w2,w3,make_up_loss],feed_dict={x:1})
        print('variable is w1:',w1_,' w2:',w2_,' w3:',w3_, ' and the loss is ',loss_)
variable is w1: 4.0  w2: -6.988893  w3: 4.0  and the loss is  1.3702081e-06
variable is w1: 4.0  w2: -6.988897  w3: 4.0  and the loss is  1.3687968e-06
variable is w1: 4.0  w2: -6.9889007  w3: 4.0  and the loss is  1.3673865e-06
variable is w1: 4.0  w2: -6.9889045  w3: 4.0  and the loss is  1.3659771e-06
variable is w1: 4.0  w2: -6.9889083  w3: 4.0  and the loss is  1.3645688e-06
variable is w1: 4.0  w2: -6.988912  w3: 4.0  and the loss is  1.3631613e-06
variable is w1: 4.0  w2: -6.988916  w3: 4.0  and the loss is  1.3617548e-06
variable is w1: 4.0  w2: -6.9889197  w3: 4.0  and the loss is  1.3603493e-06



#demo2.4  another way to avoid variable be train

label = tf.constant(1,dtype = tf.float32)
x = tf.placeholder(dtype = tf.float32)
w1 = tf.Variable(4,dtype=tf.float32,trainable=False)
w2 = tf.Variable(4,dtype=tf.float32)
w3 = tf.constant(4,dtype=tf.float32)

y_predict = w1*x+w2*x+w3*x

#define losses and train
make_up_loss = (y_predict - label)**3
optimizer = tf.train.GradientDescentOptimizer(0.01)

output_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
train_step = optimizer.minimize(make_up_loss,var_list = output_vars)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    for _ in range(3000):
        w1_,w2_,w3_,loss_ = sess.run([w1,w2,w3,make_up_loss],feed_dict={x:1})
        print('variable is w1:',w1_,' w2:',w2_,' w3:',w3_, ' and the loss is ',loss_)


#demo2.3  another way to avoid variable be train

label = tf.constant(1,dtype = tf.float32)
x = tf.placeholder(dtype = tf.float32)
#w1 = tf.Variable(4,dtype=tf.float32)
w1 = tf.Variable(4,dtype=tf.float32,trainable=False)
with tf.name_scope(name='selected_variable_to_trian'):
    w2 = tf.Variable(4,dtype=tf.float32)
w3 = tf.constant(4,dtype=tf.float32)

y_predict = w1*x+w2*x+w3*x

#define losses and train
make_up_loss = (y_predict - label)**3
optimizer = tf.train.GradientDescentOptimizer(0.01)

train_step = optimizer.minimize(make_up_loss)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    for _ in range(3000):
        w1_,w2_,w3_,loss_ = sess.run([w1,w2,w3,make_up_loss],feed_dict={x:1})
        print('variable is w1:',w1_,' w2:',w2_,' w3:',w3_, ' and the loss is ',loss_)




#demo2.4  combine of ompute_gradients() and apply_gradients()

label = tf.constant(1,dtype = tf.float32)
x = tf.placeholder(dtype = tf.float32)
w1 = tf.Variable(4,dtype=tf.float32,trainable=False)
w2 = tf.Variable(4,dtype=tf.float32)
w3 = tf.Variable(4,dtype=tf.float32)

y_predict = w1*x+w2*x+w3*x

#define losses and train
make_up_loss = (y_predict - label)**3
optimizer = tf.train.GradientDescentOptimizer(0.01)

w2_gradient = optimizer.compute_gradients(loss = make_up_loss, var_list = w2)
train_step = optimizer.apply_gradients(grads_and_vars = (w2_gradient))

init = tf.global_variables_initializer()
with tf.Session() as sess:
    for _ in range(300):
        w1_,w2_,w3_,loss_,w2_gradient_ = sess.run([w1,w2,w3,make_up_loss,w2_gradient],feed_dict={x:1})
        print('variable is w1:',w1_,' w2:',w2_,' w3:',w3_, ' and the loss is ',loss_)

具體的learning rate、step、計算公式和手動梯度下降實現:





dl/dw = 2*w*x^2-2*y*x


w1 = w0-η*dL/dw|w=w0

w2 = w1 - η*dL/dw|w=w1

w3 = w2 - η*dL/dw|w=w2





所以,本例x=1,y=3,dl/dw巧合的等於2w-2y,也就是二倍的prediction和label的差距。learning rate=1會導致w圍繞正確的值來回徘徊,完全不收斂,這樣寫主要是方便演示計算。改小learning rate 並增加迴圈次數就能收斂了。

#demo4:manual gradient descent in tensorflow
#y label
y = tf.constant(3,dtype = tf.float32)
x = tf.placeholder(dtype = tf.float32)
w = tf.Variable(2,dtype=tf.float32)
p = w*x

#define losses
l = tf.square(p - y)
g = tf.gradients(l, w)
learning_rate = tf.constant(1,dtype=tf.float32)
#learning_rate = tf.constant(0.11,dtype=tf.float32)
init = tf.global_variables_initializer()

update = tf.assign(w, w - learning_rate * g[0])

with tf.Session() as sess:
    print(sess.run([g,p,w], {x: 1}))
    for _ in range(5):
        w_,g_,l_ = sess.run([w,g,l],feed_dict={x:1})
        print('variable is w:',w_, ' g is ',g_,'  and the loss is ',l_)

        _ = sess.run(update,feed_dict={x:1})


learning rate=1

[[-2.0], 2.0, 2.0]
variable is w: 2.0  g is  [-2.0]   and the loss is  1.0
variable is w: 4.0  g is  [2.0]   and the loss is  1.0
variable is w: 2.0  g is  [-2.0]   and the loss is  1.0
variable is w: 4.0  g is  [2.0]   and the loss is  1.0
variable is w: 2.0  g is  [-2.0]   and the loss is  1.0

縮小learning rate

variable is w: 2.9964619  g is  [-0.007575512]   and the loss is  1.4347095e-05
variable is w: 2.996695  g is  [-0.0070762634]   and the loss is  1.2518376e-05
variable is w: 2.996913  g is  [-0.0066099167]   and the loss is  1.0922749e-05
variable is w: 2.9971166  g is  [-0.0061740875]   and the loss is  9.529839e-06
variable is w: 2.9973066  g is  [-0.0057668686]   and the loss is  8.314193e-06
variable is w: 2.9974842  g is  [-0.0053868294]   and the loss is  7.2544826e-06
variable is w: 2.9976501  g is  [-0.0050315857]   and the loss is  6.3292136e-06
variable is w: 2.997805  g is  [-0.004699707]   and the loss is  5.5218115e-06
variable is w: 2.9979498  g is  [-0.004389763]   and the loss is  4.8175043e-06
variable is w: 2.998085  g is  [-0.0041003227]   and the loss is  4.2031616e-06
variable is w: 2.9982114  g is  [-0.003829956]   and the loss is  3.6671408e-06
variable is w: 2.9983294  g is  [-0.0035772324]   and the loss is  3.1991478e-06

