1. 程式人生 > >機器學習-周志華-課後習題答案5.5

機器學習-周志華-課後習題答案5.5

5.5 試程式設計實現標準BP演算法和累計BP演算法,在西瓜資料集3.0上分別用這兩個演算法訓練一個單隱層網路,並進行比較。

通過程式設計實踐發現,在本例下要達到某一限定的均方誤差時,標準BP演算法比累積BP演算法明顯收斂更快,特別在本例中,將ABP演算法誤差設定到0.01時,其更新權重次數十分龐大。

本人採用標準BP演算法(隱層10個神經元)獲取資料集在誤差小於0.01時的各項權重算得其錯誤率為2/17,訓練291輪,更新權重2910次;相應地,用ABP演算法(隱層10個神經元)誤差小於0.2時的權重係數算得其錯誤率為2/17,訓練1884輪,更新權重1884次。由此可見,雖然ABP可能收斂更慢,但是其分類精度比同等條件下的BP演算法要高。

下面附上程式碼:

# -*- coding: utf-8 -*-
# STANDARD BP-NN & ACCUMULATED BP-NN
import numpy as np

class Data(object):
    def __init__(self, data):
        self.data = np.array(data)
        self.rows = len(self.data[:,0])
        self.cols = len(self.data[0,:])  # it include the column of labels
        self.__eta = 0.1  # initial eta=0.1
        self.__in = self.cols - 1  # number of input neurons
        self.__out = len(np.unique(self.data[:,-1]))  # number of output neurons
    def set_eta(self, n):
        self.__eta = n
    def get_eta(self):
        return self.__eta
    def get_in(self):
        return self.__in
    def get_out(self):
        return self.__out
    def BP_NN(self,q=10,err=0.1):
        X = self.data[:,:-1]
        # 為X矩陣左邊插入列-1來計算vx-gama,在後面對b操作應該同樣加一列,來計算wb-theta
        X = np.insert(X,[0],-1,axis=1)
        Y = np.array([self.data[:,-1], 1-self.data[:,-1]]).transpose()
        d, l = self.__in, self.__out
        v = np.mat(np.random.random((d+1, q)))  # v_0 = gama
        w = np.mat(np.random.random((q+1, l)))  # w_0 = theta

        def f(x):  # sigmoid function
            s = 1/(1+np.exp(-x))
            return s

        n = self.__eta
        gap = 1
        counter = 0
        while gap > err:  # set E_k<=0.01 to quit the loop
            counter += 1
            for i in range(self.rows):
                alpha = np.mat(X[i,:]) * v  # 1*q matrix
                b_init = f(alpha)  # 1*q matrix
                # 注意把中間變數b_init增加一個b_0,且b_0 = -1,此時成為b
                b = np.insert(b_init.T,[0],-1,axis=0)  # (q+1)*1 matrix
                beta = b.T * w  # 1*l matrix
                y_cal = np.array(f(beta))  # 1*l array

                g = y_cal * (1-y_cal) * (Y[i,:]-y_cal) # 1*l array
                w_g = w[1:,:] * np.mat(g).T  # q*1 matrix
                e = np.array(b_init) * (1-np.array(b_init)) * np.array(w_g.T)  # 1*q array
                d_w = n * b * np.mat(g)
                d_v = n * np.mat(X[i,:]).T * np.mat(e)

                w += d_w
                v += d_v
            gap = 0.5 * np.sum((Y[i, :] - y_cal) ** 2)
        print('BP_round:', counter)
        return  v,w
    def ABP_NN(self,q=10,err=0.1):
        X = self.data[:,:-1]
        # 為X矩陣左邊插入列-1來計算vx-gama,在後面對b操作應該同樣加一列,來計算wb-theta
        X = np.insert(X,[0],-1,axis=1)
        Y = np.array([self.data[:,-1], 1-self.data[:,-1]]).transpose()
        d, l = self.__in, self.__out
        v = np.mat(np.random.random((d+1, q)))  # v_0 = gama
        w = np.mat(np.random.random((q+1, l)))  # w_0 = theta

        def f(x):  # sigmoid function
            s = 1/(1+np.exp(-x))
            return s

        n = self.__eta
        gap = 1
        counter = 0
        while gap > err:  # set E_k<=1 to quit the loop
            d_v,d_w,gap = 0,0,0
            counter += 1
            for i in range(self.rows):
                alpha = np.mat(X[i,:]) * v  # 1*q matrix
                b_init = f(alpha)  # 1*q matrix
                # 注意把中間變數b_init增加一個b_0,且b_0 = -1,此時成為b
                b = np.insert(b_init.T,[0],-1,axis=0)  # (q+1)*1 matrix
                beta = b.T * w  # 1*l matrix
                y_cal = np.array(f(beta))  # 1*l array

                g = y_cal * (1-y_cal) * (Y[i,:]-y_cal) # 1*l array
                w_g = w[1:,:] * np.mat(g).T  # q*1 matrix
                e = np.array(b_init) * (1-np.array(b_init)) * np.array(w_g.T)  # 1*q array
                d_w += n * b * np.mat(g)
                d_v += n * np.mat(X[i,:]).T * np.mat(e)
                gap += 0.5 * np.sum((Y[i, :] - y_cal) ** 2)
            w += d_w/self.rows
            v += d_v/self.rows
            gap = gap/self.rows
        print('ABP_round:', counter)
        return  v,w

def test_NN(a,v,w):
    X = a.data[:,:-1]
    X = np.insert(X,[0],-1,axis=1)
    Y = np.array([a.data[:,-1], 1-a.data[:,-1]]).transpose()
    y_cal = np.zeros((a.rows,2))
    def f(x):  # sigmoid function
        s = 1 / (1 + np.exp(-x))
        return s
    for i in range(a.rows):
        alpha = np.mat(X[i,:]) * v  # 1*q matrix
        b_init = f(alpha)  # 1*q matrix
        b = np.insert(b_init.T,[0],-1,axis=0)  # (q+1)*1 matrix
        beta = b.T * w  # 1*l matrix
        y_cal[i,:] = np.array(f(beta))  # 1*l array
    print(y_cal)

D = np.array([
    [1, 1, 1, 1, 1, 1, 0.697, 0.460, 1],
    [2, 1, 2, 1, 1, 1, 0.774, 0.376, 1],
    [2, 1, 1, 1, 1, 1, 0.634, 0.264, 1],
    [1, 1, 2, 1, 1, 1, 0.608, 0.318, 1],
    [3, 1, 1, 1, 1, 1, 0.556, 0.215, 1],
    [1, 2, 1, 1, 2, 2, 0.403, 0.237, 1],
    [2, 2, 1, 2, 2, 2, 0.481, 0.149, 1],
    [2, 2, 1, 1, 2, 1, 0.437, 0.211, 1],
    [2, 2, 2, 2, 2, 1, 0.666, 0.091, 0],
    [1, 3, 3, 1, 3, 2, 0.243, 0.267, 0],
    [3, 3, 3, 3, 3, 1, 0.245, 0.057, 0],
    [3, 1, 1, 3, 3, 2, 0.343, 0.099, 0],
    [1, 2, 1, 2, 1, 1, 0.639, 0.161, 0],
    [3, 2, 2, 2, 1, 1, 0.657, 0.198, 0],
    [2, 2, 1, 1, 2, 2, 0.360, 0.370, 0],
    [3, 1, 1, 3, 3, 1, 0.593, 0.042, 0],
    [1, 1, 2, 2, 2, 1, 0.719, 0.103, 0]])
a = Data(D)
v,w = a.ABP_NN(err=0.2)
v1,w1 = a.BP_NN(err=0.01)

test_NN(a,v,w)
test_NN(a,v1,w1)


執行結果:

ABP_round: 1884
BP_round: 291


[[ 0.52207288  0.45324987]      
 [ 0.52987926  0.44755556]
 [ 0.54584984  0.42441809]
 [ 0.4985367   0.48468109]
 [ 0.56875787  0.39464855]
 [ 0.52142392  0.47297261]
 [ 0.46626988  0.53539895]
 [ 0.50013411  0.49477303]
 [ 0.41035128  0.60548034]
 [ 0.42516587  0.59000489]
 [ 0.3507589   0.67957016]
 [ 0.40119524  0.61470023]
 [ 0.43723545  0.57121177]
 [ 0.46565608  0.532883  ]
 [ 0.54464163  0.43843949]
 [ 0.37772451  0.64457881]
 [ 0.40085134  0.61430352]]


[[ 0.84115947  0.13747515]
 [ 0.80969383  0.17228699]
 [ 0.86565802  0.11538309]
 [ 0.6917523   0.2886161 ]
 [ 0.8867624   0.09633574]
 [ 0.80368707  0.17604059]
 [ 0.4655449   0.52490606]
 [ 0.53996253  0.44998827]
 [ 0.07757502  0.9308757 ]
 [ 0.10231002  0.90658563]
 [ 0.03851867  0.96698173]
 [ 0.12009371  0.88737141]
 [ 0.16490322  0.8421109 ]
 [ 0.17730987  0.83332648]
 [ 0.84579538  0.13594652]
 [ 0.05885429  0.94756339]
 [ 0.10192718  0.90597301]]