1. 程式人生 > >Spark MLlib Deep Learning Deep Belief Network (深度學習-深度信念網路)2.2

Spark MLlib Deep Learning Deep Belief Network (深度學習-深度信念網路)2.2

Spark MLlib Deep Learning Deep Belief Network (深度學習-深度信念網路)2.2

第二章Deep Belief Network (深度信念網路)

2基礎及原始碼解析

2.1 Deep Belief Network深度信念網路基礎知識

1)綜合基礎知識參照:

2)原著資料參照:

《Learning Deep Architectures for AI

《A Practical Guide to Training Restricted Boltzmann Machines》 

2.2 Deep Learning DBN原始碼解析

2.2.1 DBN程式碼結構

DBN原始碼主要包括:DBN,DBNModel兩個類原始碼結構如下:

DBN結構:

DBNModel結構:

2.2.2 DBN訓練過程

2.2.3 DBN解析

(1) DBNweight

/**

* W:權重

* b:偏置

* c:偏置

*/

caseclass DBNweight(

W: BDM[Double],

vW: BDM[Double],

b: BDM[Double],

vb: BDM[Double],

c: BDM[Double],

vc: BDM[Double])extendsSerializable

DBNweight:自定義資料型別,儲存權重。

(2) DBNConfig

/**

*配置引數

*/

caseclassDBNConfig(

size: Array[Int],

layer: Int,

momentum: Double,

alpha: Double)extends Serializable

DBNConfig:定義引數配置,儲存配置資訊。引數說明:

size:神經網路結構

layer:神經網路層數

momentum: Momentum因子

alpha:學習迭代因子

(3) InitialWeight

初始化權重

/**

* 初始化權重

* 初始化為0

*/

def InitialW(size: Array[Int]): Array[BDM[Double]] = {

// 初始化權重引數

// weights and weight momentum

// dbn.rbm{u}.W  = zeros(dbn.sizes(u + 1), dbn.sizes(u));

valn = size.length

valrbm_W = ArrayBuffer[BDM[Double]]()

for (i <-1 ton - 1) {

vald1 = BDM.zeros[Double](size(i), size(i - 1))

rbm_W += d1

}

rbm_W.toArray

}

(4) InitialWeightV

初始化權重vW

  /**

* 初始化權重vW

* 初始化為0

*/

def InitialvW(size: Array[Int]): Array[BDM[Double]] = {

// 初始化權重引數

// weights and weight momentum

// dbn.rbm{u}.vW = zeros(dbn.sizes(u + 1), dbn.sizes(u));

valn = size.length

valrbm_vW = ArrayBuffer[BDM[Double]]()

for (i <-1 ton - 1) {

vald1 = BDM.zeros[Double](size(i), size(i - 1))

rbm_vW += d1

}

rbm_vW.toArray

}

(5) Initialb

初始化偏置向量

/**

* 初始化偏置向量b

* 初始化為0

*/

def Initialb(size: Array[Int]): Array[BDM[Double]] = {

// 初始化偏置向量b

// weights and weight momentum

// dbn.rbm{u}.b  = zeros(dbn.sizes(u), 1);

valn = size.length

valrbm_b = ArrayBuffer[BDM[Double]]()

for (i <-1 ton - 1) {

vald1 = BDM.zeros[Double](size(i -1),1)

rbm_b += d1

}

rbm_b.toArray

}

(6) Initialvb

初始化偏置向量

/**

* 初始化偏置向量vb

* 初始化為0

*/

def Initialvb(size: Array[Int]): Array[BDM[Double]] = {

// 初始化偏置向量b

// weights and weight momentum

// dbn.rbm{u}.vb = zeros(dbn.sizes(u), 1);

valn = size.length

valrbm_vb = ArrayBuffer[BDM[Double]]()

for (i <-1 ton - 1) {

vald1 = BDM.zeros[Double](size(i -1),1)

rbm_vb += d1

}

rbm_vb.toArray

}

(7) Initialc

初始化偏置向量

/**

* 初始化偏置向量c

* 初始化為0

*/

def Initialc(size: Array[Int]): Array[BDM[Double]] = {

// 初始化偏置向量c

// weights and weight momentum

// dbn.rbm{u}.c  = zeros(dbn.sizes(u + 1), 1);

valn = size.length

valrbm_c = ArrayBuffer[BDM[Double]]()

for (i <-1 ton - 1) {

vald1 = BDM.zeros[Double](size(i),1)

rbm_c += d1

}

rbm_c.toArray

}

(8) Initialvc

初始化偏置向量

/**

* 初始化偏置向量vc

* 初始化為0

*/

def Initialvc(size: Array[Int]): Array[BDM[Double]] = {

// 初始化偏置向量c

// weights and weight momentum

// dbn.rbm{u}.vc = zeros(dbn.sizes(u + 1), 1);

valn = size.length

valrbm_vc = ArrayBuffer[BDM[Double]]()

for (i <-1 ton - 1) {

vald1 = BDM.zeros[Double](size(i),1)

rbm_vc += d1

}

rbm_vc.toArray

}

(8) sigmrnd

Gibbs取樣

/**

* Gibbs取樣

* X = double(1./(1+exp(-P)) > rand(size(P)));

*/

def sigmrnd(P: BDM[Double]): BDM[Double] = {

vals1 =1.0 / (Bexp(P * (-1.0)) +1.0)

valr1 = BDM.rand[Double](s1.rows,s1.cols)

vala1 =s1 :>r1

vala2 =a1.data.map { f =>if (f ==true)1.0else0.0 }

vala3 =new BDM(s1.rows,s1.cols,a2)

a3

}

/**

* Gibbs取樣

* X = double(1./(1+exp(-P)))+1*randn(size(P));

*/

def sigmrnd2(P: BDM[Double]): BDM[Double] = {

vals1 =1.0 / (Bexp(P * (-1.0)) +1.0)

valr1 = BDM.rand[Double](s1.rows,s1.cols)

vala3 =s1 + (r1 *1.0)

a3

}

(9) DBNtrain

對神經網路每一層進行訓練。

/**

* 深度信念網路(Deep Belief Network

* 執行訓練DBNtrain

*/

def DBNtrain(train_d: RDD[(BDM[Double], BDM[Double])], opts: Array[Double]): DBNModel = {

// 引數配置廣播配置

valsc = train_d.sparkContext

valdbnconfig = DBNConfig(size,layer,momentum, alpha)

// 初始化權重

vardbn_W = DBN.InitialW(size)

vardbn_vW = DBN.InitialvW(size)

vardbn_b = DBN.Initialb(size)

vardbn_vb = DBN.Initialvb(size)

vardbn_c = DBN.Initialc(size)

vardbn_vc = DBN.Initialvc(size)

// 訓練第1

printf("Training Level: %d.\n",1)

valweight0 =new DBNweight(dbn_W(0),dbn_vW(0),dbn_b(0),dbn_vb(0),dbn_c(0),dbn_vc(0))

valweight1 = RBMtrain(train_d, opts,dbnconfig,weight0)

dbn_W(0) =weight1.W

dbn_vW(0) =weight1.vW

dbn_b(0) =weight1.b

dbn_vb(0) =weight1.vb

dbn_c(0) =weight1.c

dbn_vc(0) =weight1.vc

// 訓練第2 n

for (i <-2 todbnconfig.layer -1) {

// 前向計算x

//  x = sigm(repmat(rbm.c', size(x, 1), 1) + x * rbm.W');

printf("Training Level: %d.\n",i)

valtmp_bc_w =sc.broadcast(dbn_W(i -2))

valtmp_bc_c =sc.broadcast(dbn_c(i -2))

valtrain_d2 = train_d.map { f =>

vallable = f._1

valx = f._2

valx2 = DBN.sigm(x *tmp_bc_w.value.t +tmp_bc_c.value.t)

(lable, x2)

}

// 訓練第i

valweighti =new DBNweight(dbn_W(i -1), dbn_vW(i -1),dbn_b(i -1),dbn_vb(i -1),dbn_c(i -1),dbn_vc(i -1))

valweight2 = RBMtrain(train_d2, opts,dbnconfig,weighti)

dbn_W(i -1) =weight2.W

dbn_vW(i -1) =weight2.vW

dbn_b(i -1) =weight2.b

dbn_vb(i -1) =weight2.vb

dbn_c(i -1) =weight2.c

dbn_vc(i -1) =weight2.vc

new DBNModel(dbnconfig,dbn_W,dbn_b, dbn_c)

}

(10) RBMtrain

神經網路訓練執行程式碼。

/**

* 深度信念網路(Deep Belief Network

* 每一層神經網路進行訓練rbmtrain

*/

def RBMtrain(train_t: RDD[(BDM[Double], BDM[Double])],

opts: Array[Double],

dbnconfig: DBNConfig,

weight: DBNweight): DBNweight = {

valsc = train_t.sparkContext

varStartTime = System.currentTimeMillis()

varEndTime = System.currentTimeMillis()

// 權重引數變數

varrbm_W = weight.W

varrbm_vW = weight.vW

varrbm_b = weight.b

varrbm_vb = weight.vb

varrbm_c = weight.c

varrbm_vc = weight.vc

// 廣播引數

valbc_config =sc.broadcast(dbnconfig)

// 訓練樣本數量

valm = train_t.count

// 計算batch的數量

valbatchsize = opts(0).toInt

valnumepochs = opts(1).toInt

valnumbatches = (m /batchsize).toInt

// numepochs是迴圈的次數

for (i <-1 tonumepochs) {

StartTime = System.currentTimeMillis()

valsplitW2 = Array.fill(numbatches)(1.0 / numbatches)

varerr =0.0

// 根據分組權重,隨機劃分每組樣本資料

for (l <-1 tonumbatches) {

// 1 廣播權重引數

valbc_rbm_W =sc.broadcast(rbm_W)

valbc_rbm_vW =sc.broadcast(rbm_vW)

valbc_rbm_b =sc.broadcast(rbm_b)

valbc_rbm_vb =sc.broadcast(rbm_vb)

valbc_rbm_c =sc.broadcast(rbm_c)

valbc_rbm_vc =sc.broadcast(rbm_vc)

// 2 樣本劃分

valtrain_split2 = train_t.randomSplit(splitW2, System.nanoTime())

valbatch_xy1 =train_split2(l -1)

// 3 前向計算

// v1 = batch;

// h1 = sigmrnd(repmat(rbm.c', opts.batchsize, 1) + v1 * rbm.W');

// v2 = sigmrnd(repmat(rbm.b', opts.batchsize, 1) + h1 * rbm.W);

// h2 = sigm(repmat(rbm.c', opts.batchsize, 1) + v2 * rbm.W');

// c1 = h1' * v1;

// c2 = h2' * v2;

valbatch_vh1 =batch_xy1.map { f =>

vallable = f._1

valv1 = f._2

valh1 = DBN.sigmrnd((v1 *bc_rbm_W.value.t +bc_rbm_c.value.t))

valv2 = DBN.sigmrnd((h1 *bc_rbm_W.value +bc_rbm_b.value.t))

    valh2 = DBN.sigm(v2 *bc_rbm_W.value.t +bc_rbm_c.value.t)

valc1 =h1.t *v1

valc2 =h2.t *v2

(lable, v1,h1,v2,h2,c1,c2)

}

// 4 更新前向計算

// rbm.vW = rbm.momentum * rbm.vW + rbm.alpha * (c1 - c2