1. 程式人生 > >【機器學習】BP演算法學習筆記

【機器學習】BP演算法學習筆記

BP即Back Propagation的縮寫,也就是反向傳播的意思,顧名思義,將什麼反向傳播?一張動態圖可以說明


這個網路的工作原理應該很清楚了,首先,一組輸入x1x2xm來到輸入層,然後通過與隱層的連線權重產生一組資料s1s2sn作為隱層的輸入,然後通過隱層節點的f(.)啟用函式後變為fi(e)f
其中sj表示隱層的第j個節點產生的輸出,這些輸出將通過隱層與輸出層的連線權重產生輸出層的輸入,這裡輸出層的處理過程和隱層是一樣的,最後會在輸出層產生輸出y¯j,這裡j是指輸出層第j個節點的輸出。這只是前向傳播的過程。

在這裡,先解釋一下隱層的含義,可以看到,隱層連線著輸入和輸出層,它到底是什麼?

它就是特徵空間,隱層節點的個數就是特徵空間的維數,或者說這組資料有多少個特徵。而輸入層到隱層的連線權重則將輸入的原始資料投影到特徵空間,比如sj就表示這組資料在特徵空間中第j個特徵方向的投影大小,或者說這組資料有多少份量的j特徵。而隱層到輸出層的連線權重表示這些特徵是如何影響輸出結果的,比如某一特徵對某個輸出影響比較大,那麼連線它們的權重就會比較大。關於隱層的含義就解釋這麼多,至於多個隱層的,可以理解為特徵的特徵。 

具體原理可以參照這篇文章

http://blog.csdn.net/zhongkejingwang/article/details/44514073

部落格園博文http://www.cnblogs.com/21207-iHome/p/5227868.html

BP工作過程:


BP神經元示意圖:


神經元主要功能:

  1. 計算資料,輸出結果。
  2. 更新各連線權值。
  3. 向上一層反饋權值更新值,實現反饋功能。

BP神經網路基本思想

BP神經網路學習過程由資訊的下向傳遞和誤差的反向傳播兩個過程組成

正向傳遞:由模型圖中的資料x從輸入層到最後輸出層z的過程。

反向傳播:在訓練階段,如果正向傳遞過程中發現輸出的值與期望的傳有誤差,由將誤差從輸出層返傳回輸入層的過程。返回的過程主要是修改每一層每個連線的權值w,達到減少誤的過程。

BP神經網路設計

設計思路是將神經網路分為神經元、網路層及整個網路三個層次。

網路層設計

管理一個網路層的程式碼,分為隱藏層和輸出層。 (輸入層可直接用輸入資料,不簡單實現。)

網路層主要管理自己層的神經元,所以封裝的結果與神經元的介面一樣。對向實現自己的功能。

同時為了方便處理,添加了他下一層的引用。


Python實現:

import numpy as np
x = np.mat( '2,3,3,2,1,2,3,3,3,2,1,1,2,1,3,1,2;\
            1,1,1,1,1,2,2,2,2,3,3,1,2,2,2,1,1;\
            2,3,2,3,2,2,2,2,3,1,1,2,2,3,2,2,3;\
            3,3,3,3,3,3,2,3,2,3,1,1,2,2,3,1,2;\
            1,1,1,1,1,2,2,2,2,3,3,3,1,1,2,3,2;\
            1,1,1,1,1,2,2,1,1,2,1,2,1,1,2,1,1;\
            0.697,0.774,0.634,0.668,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719;\
            0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103\
            ').T
x = np.array(x)
y = np.mat('1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0')
y = np.array(y).T
'''
x = np.mat( '1,1,2,2;\
             1,2,1,2\
             ').T
x = np.array(x)
y=np.mat('0,1,1,0')
y = np.array(y).T
'''
xrow, xcol = x.shape
yrow, ycol = y.shape
print ('x: ', x.shape, x)
print ('y: ', y.shape, y)

class BP:
    def __init__(self, n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value):
        self.n_input = n_input
        self.n_hidden_layer = n_hidden_layer
        self.n_output = n_output
        self.learn_rate = learn_rate
        self.error = error
        self.n_max_train = n_max_train

        self.v = np.random.random((self.n_input, self.n_hidden_layer))
        self.w = np.random.random((self.n_hidden_layer, self.n_output))
        self.theta0 = np.random.random(self.n_hidden_layer)
        self.theta1 = np.random.random(self.n_output)
        self.b = []
        self.yo = []
        self.x = 0
        self.y = 0
        self.lossAll = []
        self.lossAverage = 0
        self.nRight = 0
        self.value = value

    def printParam(self):
        print ('printParam')
        print( '---------------')
        print ('     v: ', self.v)
        print ('     w: ', self.w)
        print ('theta0: ', self.theta0)
        print ('theta1: ', self.theta1)
        print ('---------------')

    def init(self, x, y):
        #print 'init'
        nx = len(x)
        ny = len(y)
        self.x = x
        self.y = y
        self.b = []
        self.yo = []
        for k in range(nx):
            tmp = []
            for h in range(self.n_hidden_layer):
                tmp.append(0)
            self.b.append(tmp)
            tmp = []
            for j in range(self.n_output):
                tmp.append(0)
            self.yo.append(tmp)

    def printResult(self):
        print( 'printResult')
        self.calculateLossAll()
        print('lossAll: ', self.lossAll)
        print('lossAverage: ', self.lossAverage)
        self.nRight = 0
        for k in range(len(self.x)):
            print(self.y[k], '----', self.yo[k])
            self.nRight += 1
            for j in range(self.n_output):
                if(self.yo[k][j] > self.value[j][0] and self.y[k][j] != self.value[j][2]):
                    self.nRight -= 1
                    break
                if(self.yo[k][j] < self.value[j][0] and self.y[k][j] != self.value[j][1]):
                    self.nRight -= 1
                    break
        print( 'right rate: %d/%d'%(self.nRight, len(self.x)))

    def printProgress(self):
        print('yo: ', self.yo)

    def calculateLoss(self, y, yo):
        #print 'calculateLoss'
        loss = 0
        for j in range(self.n_output):
            loss += (y[j] - yo[j])**2
        return loss

    def calculateLossAll(self):
        self.lossAll = []
        for k in range(len(self.x)):
            loss = self.calculateLoss(self.y[k], self.yo[k])
            self.lossAll.append(loss)

        self.lossAverage = sum(self.lossAll) / len(self.x)

    def calculateOutput(self, x, k):
        #print 'calculateOutput'
        for h in range(self.n_hidden_layer):
            tmp = 0
            for i in range(self.n_input):
                tmp += self.v[i][h] * x[i]
            self.b[k][h] = sigmoid(tmp - self.theta0[h])

        for j in range(self.n_output):
            tmp = 0
            for h in range(self.n_hidden_layer):
                tmp += self.w[h][j] * self.b[k][h]
            self.yo[k][j] = sigmoid(tmp - self.theta1[j])
        #print 'yo of x[k]', self.yo[k]
        #print ' b of x[k]', self.b[k]

        #print ' b:', self.b
        #print 'yo:', self.yo

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))

class BPStandard(BP):
    '''
        標準bp演算法就是每計算一個訓練例就更新一次引數
    '''

    def updateParam(self, k):
        #print 'updateParam: ', k
        g = []
        #print ' y: ', self.y
        #print 'yo: ', self.yo
        #print ' b: ', self.b
        for j in range(self.n_output):
            tmp = self.yo[k][j] * (1 - self.yo[k][j]) * (self.y[k][j] - self.yo[k][j])
            g.append(tmp)
        e = []
        for h in range(self.n_hidden_layer):
            tmp = 0
            for j in range(self.n_output):
                tmp += self.b[k][h] * (1.0 - self.b[k][h]) * self.w[h][j] * g[j]
            e.append(tmp)
        #print ' g: ', g
        #print ' e: ', e

        for h in range(self.n_hidden_layer):
            for j in range(self.n_output):
                self.w[h][j] += self.learn_rate * g[j] * self.b[k][h]
        for j in range(self.n_output):
            self.theta1[j] -= self.learn_rate * g[j]
        for i in range(self.n_input):
            for h in range(self.n_hidden_layer):
                self.v[i][h] += self.learn_rate * e[h] * self.x[k][i]
        for h in range(self.n_hidden_layer):
            self.theta0[h] -= self.learn_rate * e[h]


    def train(self, x, y):
        print( 'train neural networks')
        self.init(x, y)
        self.printParam()
        tag = 0
        loss1 = 0
        print('train begin:')
        n_train = 0
        nr = 0
        while 1:
            for k in range(len(x)):
                n_train += 1
                self.calculateOutput(x[k], k)
                #loss = self.calculateLoss(y[k], self.yo[k])
                self.calculateLossAll()
                loss = self.lossAverage
                #print 'k, y, yo, loss', k, y[k], self.yo[k], loss
                if abs(loss1 - loss) < self.error:
                    nr += 1
                    if nr >= 100: # 連續100次達到目標才結束
                        break
                else:
                    nr = 0
                    self.updateParam(k)

                if n_train % 10000 == 0:
                    for k in range(len(x)):
                        self.calculateOutput(x[k], k)
                    self.printProgress()

            if n_train > self.n_max_train or nr >= 100:
                break

        print('train end')
        self.printParam()
        self.printResult()
        print ('train count: ', n_train)

class BPAll(BP):
    def updateParam(self):
        #print 'updateParam: ', k
        g = []
        #print ' y: ', self.y
        #print 'yo: ', self.yo
        #print ' b: ', self.b
        for k in range(len(self.x)):
            gk = []
            for j in range(self.n_output):
                tmp = self.yo[k][j] * (1 - self.yo[k][j]) * (self.y[k][j] - self.yo[k][j])
                gk.append(tmp)
            g.append(gk)

        e = []
        for k in range(len(self.x)):
            ek = []
            for h in range(self.n_hidden_layer):
                tmp = 0
                for j in range(self.n_output):
                    tmp += self.b[k][h] * (1.0 - self.b[k][h]) * self.w[h][j] * g[k][j]
                ek.append(tmp)
            e.append(ek)

        #print ' g: ', g
        #print ' e: ', e

        for h in range(self.n_hidden_layer):
            for j in range(self.n_output):
                for k in range(len(self.x)):
                    self.w[h][j] += self.learn_rate * g[k][j] * self.b[k][h]
        for j in range(self.n_output):
            for k in range(len(self.x)):
                self.theta1[j] -= self.learn_rate * g[k][j]

        for i in range(self.n_input):
            for h in range(self.n_hidden_layer):
                for k in range(len(self.x)):
                    self.v[i][h] += self.learn_rate * e[k][h] * self.x[k][i]
        for h in range(self.n_hidden_layer):
            for k in range(len(self.x)):
                self.theta0[h] -= self.learn_rate * e[k][h]



    def train(self, x, y):
        print('train neural networks')
        self.init(x, y)
        tag = 0
        loss1 = 0
        print('train begin:')
        n_train = 0
        self.printParam()
        nr = 0
        while 1:
            n_train += 1

            for k in range(len(x)):
               self.calculateOutput(x[k], k)

            self.calculateLossAll()
            loss = self.lossAverage
            if abs(loss - loss1) < self.error:
                nr += 1
                # 連續100次達到目標才結束
                if(nr >= 100):
                    break;
            else:
                nr = 0
                self.updateParam()
            if n_train % 10000 == 0:
                self.printProgress()
        print('train end')
        self.printParam()
        self.printResult()
        print('train count: ', n_train)

if __name__ == '__main__':
    # 引數分別是 屬性數量,隱層神經元數量,輸出值數量,學習率,誤差
    # 最大迭代次數 以及 對應每個輸出的取值(用於計算正確率)
    n_input = xcol
    n_hidden_layer = 10
    n_output = ycol
    learn_rate = 0.1
    error = 0.005
    n_max_train = 1000000
    value = [[0.5, 0, 1]]

    bps = BPStandard(n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value)
    bpa = BPAll(n_input, n_hidden_layer, n_output, learn_rate, error, n_max_train, value)
    bpa.train(x, y)
    #bps.train(x, y)