1. 程式人生 > >Tensorflow(五)使用CNN對MNIST資料集進行分類

Tensorflow(五)使用CNN對MNIST資料集進行分類

在tensorflow(二)中對MNIST資料集進行分類使用單層神經網路,梯度下降法以0.2的學習因子迭代了100次取得了92%的準確率,這個網路很簡單,使用較大的學習因子也不會出現梯度爆炸或者梯度消失的情況,但是在複雜些的網路,比如這裡用到的三層CNN網路使用0.2的學習因子就過大了。

本文結合了tensorfow(三)中的卷積神經網路模型以及tensorflow(四)中的tensorboard檢視方法,神經網共有三層,兩個卷積層,一個全連線層,一般情況下對特徵圖進行卷積操作後也會進行池化操作,所以講池化層也包含在卷積層當中,當然程式碼實現是分開的,只是計算神經網路的層次時將他們劃分在一起,並且統稱為一個卷積層。

具體的內容在前面兩節中都有總結,這裡就直接貼程式碼了,需要說明的地方會註釋:

#導包
import numpy as np
import h5py
import tensorflow as tf

#MNIST資料
#需要注意的一點是,資料格式與單層神經網路不同,CNN不需要把資料整合為(m*n)的格式
#也就是CNN不需要將所有特徵值都合併在一起
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data',one_hot = True)
train_x = mnist.train.images
train_y = mnist.train.labels

test_x = mnist.test.images
test_y = mnist.test.labels #(55000, 10)

train_x = train_x.reshape([-1,28,28,1]) #(55000, 28, 28, 1)
test_x = test_x.reshape([-1,28,28,1]) # (10000, 28, 28, 1)


#定義一個 變數所有summary的整合
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)


#新建佔位符
def create_placeholders(n_H0,n_W0,n_C0,n_y):
    with tf.name_scope('input'):
        X = tf.placeholder(shape=[None,n_H0,n_W0,n_C0],dtype = tf.float32,name='x_input')
        Y = tf.placeholder(shape=[None,n_y],dtype = tf.float32,name='y_input')
    return X,Y


#向前傳播
def forward_propagation(X):
    tf.set_random_seed(1)
    
    #第一個卷積層 conv-relu-pooling
    with tf.name_scope('layer_conv_1'):
        with tf.name_scope('weight1'):
            W1 = tf.get_variable('weight1',[4,4,1,8],initializer = tf.contrib.layers.xavier_initializer(seed = 0))
            variable_summaries(W1)
        with tf.name_scope('conv1'):
            Z1 = tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')
        with tf.name_scope('activation_relu'):
            A1 = tf.nn.relu(Z1)
        with tf.name_scope('pooling1'):
            P1 = tf.nn.max_pool(A1,ksize=[1,8,8,1],strides=[1,8,8,1],padding='SAME')
    
    #第二個卷積層 conv-relu-pooling
    with tf.name_scope('layer_conv_2'):
        with tf.name_scope('weight2'):
            W2 = tf.get_variable('weight2',[2,2,8,16],initializer = tf.contrib.layers.xavier_initializer(seed = 0))
            variable_summaries(W2)
        with tf.name_scope('conv2'):
            Z2 = tf.nn.conv2d(P1,W2,strides=[1,1,1,1],padding='SAME')
        with tf.name_scope('activation_relu'):
            A2 = tf.nn.relu(Z2)
        with tf.name_scope('pooling2'):
            P2 = tf.nn.max_pool(A2,ksize=[1,4,4,1],strides=[1,4,4,1],padding='SAME')
    
    #第三個全連線層 flat-fc
    with tf.name_scope('layer_FC_3'):
        with tf.name_scope('pooling2_flat'):
            P2 = tf.contrib.layers.flatten(P2)
        with tf.name_scope('FC3'):
            Z3 = tf.contrib.layers.fully_connected(P2,num_outputs=10,activation_fn=None)
    
    return Z3



#計算代價函式
def compute_cost(Z3,Y):
    with tf.name_scope('cost_cross_entry'):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3,labels=Y))
        tf.summary.scalar('cost_cross_entry',cost)
    return cost



#生成隨機的資料
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
    """
    Creates a list of random minibatches from (X, Y)
    
    Arguments:
    X -- input data, of shape (input size, number of examples) (m, Hi, Wi, Ci)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) (m, n_y)
    mini_batch_size - size of the mini-batches, integer
    seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
    
    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    
    m = X.shape[0]                  # number of training examples
    mini_batches = []
    np.random.seed(seed)
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[permutation,:,:,:]
    shuffled_Y = Y[permutation,:]
 
    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:]
        mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:,:,:]
        mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches




#模型整合
from tensorflow.python.framework import ops
def model(train_x,train_y,test_x,test_y,learning_rate = 0.001,iteration=51,batch_size=100,print_cost=True):
   
    # 返回模型無需重寫引數
    ops.reset_default_graph() 
    tf.set_random_seed(1)
    
    costs = []
    seed = 3
    
    m,n_H0,n_W0,n_C0 = train_x.shape
    n_y = train_y.shape[1]
    
    X,Y = create_placeholders(n_H0,n_W0,n_C0,n_y)
    Z3 = forward_propagation(X)
    cost = compute_cost(Z3,Y)
    
    #定義optimizer
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    #定義準確率
    with tf.name_scope('accuracy'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.equal(tf.argmax(Z3,1),tf.argmax(Y,1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction,'float'))
            tf.summary.scalar('accuracy',accuracy)
            
    #合併所有的summary        
    merge = tf.summary.merge_all()
    
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        train_writer = tf.summary.FileWriter('D:/jupyproject/tensorflow/logs',sess.graph)
        
        for epoch in range(iteration):
            
            seed = seed+1
            batches = random_mini_batches(train_x,train_y,batch_size,seed)
            
            for batch in batches:
                (mini_x,mini_y)=batch
                summary,_ ,train_acc= sess.run([merge,optimizer,accuracy],feed_dict={X:mini_x,Y:mini_y})
                
            train_writer.add_summary(summary,epoch)
            
            if print_cost ==  True and epoch % 5 == 0:
                test_acc = sess.run(accuracy,feed_dict={X:test_x,Y:test_y})
                print('acc after epoch %i : %f   %f' %(epoch,train_acc,test_acc))


            

訓練一個模型,測試精確度,因為訓練時間較長,所以只迭代了51次:

model(train_x,train_y,test_x,test_y)

#結果: 有過擬合現象
acc after epoch 0 : 0.690000   0.716000
acc after epoch 5 : 0.920000   0.914400
acc after epoch 10 : 0.890000   0.934400
acc after epoch 15 : 0.960000   0.942600
acc after epoch 20 : 0.950000   0.949100
acc after epoch 25 : 0.950000   0.949400
acc after epoch 30 : 0.980000   0.952300
acc after epoch 35 : 0.940000   0.950500
acc after epoch 40 : 0.970000   0.954300
acc after epoch 45 : 0.970000   0.956100
acc after epoch 50 : 0.970000   0.956500

再來看看tensorboard:

針對過擬合的問題,試試加上dropout 看會不會有所改善

#定義一個dropout佔位符
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32,name='dropout')
    tf.summary.scalar('dropout_keep_probability', keep_prob)

#在forward中選擇要drop的那一層,因為只有一個全連線層,就對第二個卷積層池化後的結果進行drop
with tf.name_scope('layer_FC_3'):
        with tf.name_scope('pooling2_flat'):
            P2 = tf.contrib.layers.flatten(P2)
            P2_dropped = tf.nn.dropout(P2, keep_prob)
        with tf.name_scope('FC3'):
            
            Z3 = tf.contrib.layers.fully_connected(P2_dropped,num_outputs=10,activation_fn=None)



結果:
acc after epoch 0 : 0.680000   0.732400
acc after epoch 5 : 0.830000   0.901900
acc after epoch 10 : 0.930000   0.927500
acc after epoch 15 : 0.890000   0.934100
acc after epoch 20 : 0.940000   0.935800
acc after epoch 25 : 0.860000   0.938500
acc after epoch 30 : 0.890000   0.940900
acc after epoch 35 : 0.910000   0.943300
acc after epoch 40 : 0.920000   0.943700
acc after epoch 45 : 0.920000   0.943500
acc after epoch 50 : 0.940000   0.945800

可以看出確實改善了過擬合的問題,但是訓練的精確也降低了,一般情況會有多個全連線層,然後再使用drop,增加迭代的次數會有更好的結果,我的電腦訓練實在太慢了,就留著以後裝GPU版本了再試試吧。

我也試過在前兩個卷積層進行drop,發現對卷積層進行drop對網路效能的傷害力度要比在全連線層drop大,在卷積層drop完test集精度只能達到92%,當然這是針對我這個模型的,可能再大一點的模型在卷積層drop也不會對網路效能造成很大的損傷。