1. 程式人生 > >用python的numpy實現神經網路 實現 手寫數字識別

用python的numpy實現神經網路 實現 手寫數字識別

首先是讀取檔案,train-images-idx3-ubyte等四個檔案是mnist資料集裡的資料。放在MNIST資料夾裡。MNIST資料夾和這個.py檔案放在同一個資料夾裡。

import numpy as np
import struct

train_images_idx3_ubyte_file = './MNIST/train-images-idx3-ubyte'
train_labels_idx1_ubyte_file = './MNIST/train-labels-idx1-ubyte'

test_images_idx3_ubyte_file = './MNIST/t10k-images-idx3-ubyte'
test_labels_idx1_ubyte_file = './MNIST/t10k-labels-idx1-ubyte'

因為資料集裡的圖片是二進位制的。需要將它轉化成numpy需要的資料形式。這一段就是轉化過程。

bin_data就是二進位制資料。'rb'就是讀取二進位制資料。

count是測試集的數量,本實驗是10000個。num_images是訓練集的數量,本實驗是60000個。size是圖片大小,本實驗為28*28。

magic number是一個檔案協議的描述, 也是在我們呼叫 fromfile方法將位元組讀入 NumPy array 之前在檔案緩衝中的 item 數. 作為引數值傳入 struct.unpack 的 >II 有兩個部分。這個具體的可以不細追究。感興趣的話,可以百度詳細瞭解下mnist資料集。

f='>IIII'是二進位制大端讀取方法。

def decode_idx3_ubyte(idx3_ubyte_file):
    bin_data = open(idx3_ubyte_file, 'rb').read()

    offset = 0
    fmt_header = '>IIII'
    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
    print ("magic:%d, count: %d, size: %d*%d" % (magic_number, num_images, num_rows, num_cols))

    image_size = num_rows * num_cols
    offset += struct.calcsize(fmt_header)
    fmt_image = '>' + str(image_size) + 'B'
    images = np.empty((num_images, num_rows, num_cols))
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print("done %d" % (i + 1) + "pictures")
        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
        offset += struct.calcsize(fmt_image)
    return images

這一段是將 標籤 從二進位制轉換成numpy所需的形式。

def decode_idx1_ubyte(idx1_ubyte_file):
    bin_data = open(idx1_ubyte_file, 'rb').read()

    offset = 0
    fmt_header = '>ii'
    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
    print("magic:%d, num_images: %d zhang" % (magic_number, num_images))

    offset += struct.calcsize(fmt_header)
    fmt_image = '>B'
    labels = np.empty(num_images)
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print("done %d" % (i + 1) + "zhang")
        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
        offset += struct.calcsize(fmt_image)
    return labels
載入資料
def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
    return decode_idx3_ubyte(idx_ubyte_file)


def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
    return decode_idx1_ubyte(idx_ubyte_file)


def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):
    return decode_idx3_ubyte(idx_ubyte_file)


def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):
    return decode_idx1_ubyte(idx_ubyte_file)
標準正則化。以及初始化引數。這裡用的是均值初始化。
def narmalize_data(ima):
    a_max=np.max(ima)
    a_min=np.min(ima)
    for j in range(ima.shape[0]):
        ima[j]=(ima[j]-a_min)/(a_max-a_min)
    return ima
def initialize_with_zeros(n_x,n_h,n_y):
    np.random.seed(2)
    # W1=np.random.randn(n_h,n_x)*0.00000001    # W1=np.random.randn(n_h,n_x)
    W1=np.random.uniform(-np.sqrt(6)/np.sqrt(n_x+n_h),np.sqrt(6)/np.sqrt(n_h+n_x),size=(n_h,n_x))
    # W1=np.reshape(32,784)
    b1=np.zeros((n_h,1))
    # W2=np.random.randn(n_y,n_h)*0.00000001  # W2=np.random.randn(n_y,n_h)
    W2=np.random.uniform(-np.sqrt(6)/np.sqrt(n_y+n_h),np.sqrt(6)/np.sqrt(n_y+n_h),size=(n_y,n_h))
    b2=np.zeros((n_y,1))

    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}

    return parameters
前向傳播和損失函式的計算
def forward_propagation(X,parameters):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    # print W1,X,b1
    Z1=np.dot(W1,X)+b1
    # A1=sigmoid(Z1)
    A1=np.tanh(Z1)
    Z2=np.dot(W2,A1)+b2
    A2=sigmoid(Z2)
    #assert(A2.shape == (1, X.shape[1]))
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    return A2, cache

def costloss(A2,Y,parameters):
    # m=Y.shape[0]
    t=0.00000000001
    logprobs=np.multiply(np.log(A2+t),Y) + np.multiply(np.log(1-A2+t),(1-Y))
    # print("jixiaozhi: ",10*np.exp(-10))
    # logprobs = np.multiply(A2-Y,A2-Y)
    cost=np.sum(logprobs,axis=0,keepdims=True)/A2.shape[0]
    # cost=np.squeeze(cost)
    # assert(isinstance(cost, float))
        # cost=cost.astype(float)
    # cost=Variable(cost)
    return cost
反向傳播和更新引數
def back_propagation(parameters,cache,X,Y):
    # m=X.shape[0]
    # print('m',m)
    W1=parameters["W1"]
    W2=parameters["W2"]
    A1 = cache["A1"]
    A2 = cache["A2"]
    Z1=cache["Z1"]

    dZ2=A2-Y
    # print("dz2: ",dZ2)
    # print("A1: ",A1.T)
    dW2=np.dot(dZ2,A1.T)
    db2=np.sum(dZ2,axis=1,keepdims=True)
    dZ1=np.dot(W2.T,dZ2)*(1-np.power(A1,2))
    # dZ1=np.dot(W2.T,dZ2)*sigmoid(Z1)*(1-sigmoid(Z1))
    dW1=np.dot(dZ1,X.T)
    db1=np.sum(dZ1,axis=1,keepdims=True)
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}

    # print("Dw2:",dW2)
    # print("Db2:",db2)
    return grads

def update_para(parameters, grads, learning_rate ):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    # print("learning_rate:",learning_rate)
    # sumdW1=np.sum(dW1,axis=1,keepdims=True)
    # print("shape of dw1:",dW1.shape)
    # print("sumdW1: ",sumdW1)
    W1=W1-learning_rate*dW1
    b1=b1-learning_rate*db1
    W2=W2-learning_rate*dW2
    b2=b2-learning_rate*db2

    # print("W1",W1)
    # print("W2",W2)
    # print("chakan...")
    # print("canshugengxin....")
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters
定義sigmoid啟用函式,softmax等引數。image2vector的作用是將輸入從28*28的圖片變成一個列向量。
def sigmoid(x):
    s=1/(1+np.exp(-x))
    return s
def image2vector(image):
    v=np.reshape(image,[784,1])
    return v
def softmax(x):
    v=np.argmax(x)
    return v
執行實驗以及顯示訓練過程。執行50000步

n_x為輸入圖片的大小,本次為28*28。n_h為神經網路的層數。本次實驗設計的是兩層的神經網路。一箇中間層,一個輸出層。(輸入層不算入層數)。

本次試驗中,中間層是32個神經元,選別的數量也行。此時正確識別數量為9119個。若選為10個神經元,實驗得到正確識別結果為8619.可見準確率下降了。

n_y是輸出神經元的數量。本次因為是從0-9共10個數字,所以n_y=10。ii是測試集中正確識別了的圖片數。

然後開始訓練,迴圈。直至結束。

if __name__ == '__main__':
    train_images = load_train_images()
    train_labels = load_train_labels()
    test_images = load_test_images()
    test_labels = load_test_labels()

    ii=0
    n_x=28*28
    n_h=32
    n_y=10
    parameters=initialize_with_zeros(n_x,n_h,n_y)
    for i in range(50000):
        # print('i:',i)
        img_train=train_images[i]
        label_train1=train_labels[i]
        label_train=np.zeros((10,1))
        ttt=0.001
        if i>1000:
            ttt=ttt*0.999
        label_train[int(train_labels[i])]=1
        # print("train_label is: ", label_train)
        # print train_labels[i
        imgvector1=image2vector(img_train)
        # print("imgvector1: before transform: ",imgvector1)
        imgvector=narmalize_data(imgvector1)
        # print("after transform: ",imgvector)

        # imgvector=image2vector(train_images)
        A2,cache=forward_propagation(imgvector,parameters)
        # print("A2:",A2)
        pre_label=softmax(A2)
        # print (pre_label, label_train1)
        # if pre_label==int(label_train1):
            # ii=ii+1
        # print("real value: ",label_train1)
        # print("pre_label: ",pre_label)
        costl=costloss(A2,label_train,parameters)
        grads = back_propagation(parameters, cache, imgvector, label_train)
        parameters = update_para(parameters, grads, learning_rate = ttt)
        grads["dW1"]=0
        grads["dW2"]=0
        grads["db1"]=0
        grads["db2"]=0
        # if i%1000==0:
            # pass
        print("cost after iteration %i:"%(i))
        print(costl)
    # print("ii de value: ",ii/50000.)
    # print('parameters',parameters["W1"],parameters["W2"],parameters["b1"],parameters["b2"])      # plt.imshow(train_images[i], cmap='gray')
    # print("cost : ",costl)
       # plt.show()
    for i in range(10000):
        img_train=test_images[i]
        vector_image=narmalize_data(image2vector(img_train))
        label_trainx=test_labels[i]
        aa2,xxx=forward_propagation(vector_image,parameters)
        predict_value=softmax(aa2)
        if predict_value==int(label_trainx):
            ii=ii+1
        # print("the real value is: ",label_trainx)
        # print("the value of our prediction is: ",predict_value)
    print(ii)

測試得到的執行結果如下圖。從第0步執行到4999,一共執行50000次。

最後的9119,就是顯示的10000張測試機圖片中預測正確的圖片數。本次實驗中,預測正確的圖片為9119張。正確率為91.19%