1. 程式人生 > >基於TensorFlow的最近鄰(NN)分類器——以MNIST識別為例

基於TensorFlow的最近鄰(NN)分類器——以MNIST識別為例

一、最近鄰分類理論

二、TF在CPU上實現NN分類

具體程式碼如下:

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data   
'''========load data========'''
#mnist = input_data.read_data_sets('MNIST_data', one_hot=True)  #'MNIST_data'設定請參考前幾篇文章

# 獲得訓練樣本個數
#train_nums = mnist.train.num_examples
# 讀取所有訓練樣本和測試樣本 #X_train = mnist.train.images #X_test = mnist.test.images #Y_train = mnist.train.labels #Y_test = mnist.test.labels # 批量讀取部分樣本 X_train,Y_train = mnist.train.next_batch(1000) X_test,Y_test = mnist.test.next_batch(200) '''========0.定義常量========''' insize = 784 #input size '''計算圖輸入佔位符'''
xs = tf.placeholder(tf.float32,[None,insize]) xst = tf.placeholder(tf.float32,[insize]) '''使用 L1 距離進行最近鄰計算''' # L1:dist = sum(|X1-X2|) 或 L2:dist=sqrt(sum(|X1-X2|^2)) dist = tf.reduce_sum(tf.abs(tf.add(xs,tf.negative(xst))), reduction_indices=1) #或dist = tf.reduce_sum(tf.abs(tf.subtract(xtrain, xtest))), axis=1)
# 預測: 獲得最小距離的索引,然後根據此索引的類標和正確的類標進行比較 index = tf.arg_min(dist,0) # 初始化所有變數 init = tf.global_variables_initializer() #定義一個正確率計算器 Accuracy = 0; # 執行會話 with tf.Session() as sess: sess.run(init) # 只能迴圈地對測試樣本進行預測 for i in range(len(X_test)): #print('Dist=',sess.run(dist,feed_dict={xs:X_train,xst:X_test[i,:]})) id = sess.run(index,feed_dict={xs:X_train,xst:X_test[i,:]}) # 計算預測標籤和正確標籤用於比較 Predict_label = np.argmax(Y_train[id]) True_label = np.argmax(Y_test[i]) print("Test Sample",i,"Prediction label:",Predict_label, "True Class label:",True_label) # 計算精確度 if Predict_label == True_label: Accuracy +=1 print("Accuracy=",Accuracy/len(X_test))

方法二:將其讀資料和NN分類單獨寫出函式,方便後期呼叫

import tensorflow as tf
def load_mnist_data(filename,isbatch=0,train_nums=1000,test_nums=200):
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets(filename, one_hot=True)
       #2、批量獲取樣本
    if isbatch==1:
        X_train,Y_train = mnist.train.next_batch(train_nums)
        X_test,Y_test = mnist.test.next_batch(test_nums)
        return X_train,Y_train,X_test,Y_test
    else:
        #1、獲取全部樣本
        X_train = mnist.train.images   #[1:10]
        X_test = mnist.test.images
        Y_train = mnist.train.labels
        Y_test = mnist.test.labels
        return X_train,Y_train,X_test,Y_test

def NN_Classifier(X_train,Y_train,X_test,Y_test,dims=784,dist_metric='L1'):
    # 計算圖輸入佔位符
    xs = tf.placeholder(tf.float32,[None,dims])
    xst  = tf.placeholder(tf.float32,[dims])  
    # 使用 L1 距離進行最近鄰計算
    # L1:dist = sum(|X1-X2|)  或 L2:dist=sqrt(sum(|X1-X2|^2))
    dist = tf.reduce_sum(tf.abs(tf.add(xs,tf.negative(xst))),
                         reduction_indices=1)
    #或dist = tf.reduce_sum(tf.abs(tf.subtract(xtrain, xtest))), axis=1)

    # 預測: 獲得最小距離的索引,然後根據此索引的類標和正確的類標進行比較
    index = tf.arg_min(dist,0)

    # 初始化所有變數
    init = tf.global_variables_initializer()    

    #定義一個正確率計算器
    Accuracy = 0

    # 執行會話
    with tf.Session() as sess:
        sess.run(init) 
        # 只能迴圈地對測試樣本進行預測
        for i in range(len(X_test)):  
            id = sess.run(index,feed_dict={xs:X_train,xst:X_test[i,:]})
            # 計算預測標籤和正確標籤用於比較
            Predict_label = np.argmax(Y_train[id])
            True_label = np.argmax(Y_test[i])

            print("Test Sample",i,"Prediction label:",Predict_label,
                  "True Class label:",True_label)

            # 計算精確度
            if Predict_label == True_label:
                Accuracy +=1
        print("Accuracy=",Accuracy/len(X_test))    

    return Accuracy    

if __name__ == '__main__':  
    X_train,Y_train,X_test,Y_test = load_mnist_data("MNIST_data",isbatch=1,train_nums=1000,test_nums=200)    
    Accuracy =  NN_Classifier(X_train,Y_train,X_test,Y_test,dims=784,dist_metric='L1')