YOLOv2原始碼分析（五）

阿新 • • 發佈：2018-11-04

文章全部YOLOv2原始碼分析

0x01 make_convolutional_layer

終於又回到了make_convolutional_layer這個函式

//make_convolutional_layer
    if(binary){
        l.binary_weights = calloc(l.nweights, sizeof(float));
        l.cweights = calloc(l.nweights, sizeof(char));
        l.scales = calloc 
(n, sizeof(float));
    }
    if(xnor){
        l.binary_weights = calloc(l.nweights, sizeof(float));
        l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
    }

    if(batch_normalize){
        l.scales = calloc(n, sizeof(float));
        l.scale_updates = calloc(n, sizeof(float));
        for 
(i = 0; i < n; ++i){
            l.scales[i] = 1;
        }

        l.mean = calloc(n, sizeof(float));
        l.variance = calloc(n, sizeof(float));

        l.mean_delta = calloc(n, sizeof(float));
        l.variance_delta = calloc(n, sizeof(float));

        l.rolling_mean = calloc(n, sizeof(float 
));
        l.rolling_variance = calloc(n, sizeof(float));
        l.x = calloc(l.batch*l.outputs, sizeof(float));
        l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
    }
...

    fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);

    return l;
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35

如果你把之前的幾篇文章都看過的話，那麼這裡的引數意義你應該很清楚了。這裡面我唯一要說的幾個內容是關於cuda程式設計的，但是我會把這部分內容放到本系列文章的最後去說，如果你感興趣的話，可以到時候去看看。

至此我們終於結束了make_convolutional_layer函式

0x02 parse_convolutional

大家可以回到（二）中的0x0103

    //parse_convolutional
    convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam);
    layer.flipped = option_find_int_quiet(options, "flipped", 0);
    layer.dot = option_find_float_quiet(options, "dot", 0);

    return layer;
}
  
   1
   2
   3
   4
   5
   6
   7

後面沒什麼好說的，回到parse_network_cfg

0x03 parse_network_cfg

時隔多日，又回到了這裡（二）0x0102

        //parse_network_cfg
        if(lt == CONVOLUTIONAL){
            l = parse_convolutional(options, params);
        }else if(lt == DECONVOLUTIONAL){
            l = parse_deconvolutional(options, params);
        }
  
   1
   2
   3
   4
   5
   6

我們看這個parse_deconvolutional函式

0x00301 parse_deconvolutional

layer parse_deconvolutional(list *options, size_params params)
{
    int n = option_find_int(options, "filters",1);
    int size = option_find_int(options, "size",1);
    int stride = option_find_int(options, "stride",1);

    char *activation_s = option_find_str(options, "activation", "logistic");
    ACTIVATION activation = get_activation(activation_s);

    int batch,h,w,c;
    h = params.h;
    w = params.w;
    c = params.c;
    batch=params.batch;
    if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
    int pad = option_find_int_quiet(options, "pad",0);
    int padding = option_find_int_quiet(options, "padding",0);
    if(pad) padding = size/2;

    layer l = make_deconvolutional_layer(batch,h,w,c,n,size,stride,padding, activation, batch_normalize, params.net->adam);

    return l;
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24

上面的一些引數我在之前的文章中已經說過了，這裡就不再說明了。直接看關鍵函式make_deconvolutional_layer

layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam)
{
    int i;
    layer l = {0};
    l.type = DECONVOLUTIONAL;

...

    l.forward = forward_deconvolutional_layer;
    l.backward = backward_deconvolutional_layer;
    l.update = update_deconvolutional_layer;
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11

前面的引數資訊我這裡也不再提了，直接看關鍵的三個函式，先看第一個forward_deconvolutional_layer

0x030101 forward_deconvolutional_layer

void forward_deconvolutional_layer(const layer l, network net)
{
    int i;

    int m = l.size*l.size*l.n;
    int n = l.h*l.w;
    int k = l.c;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    for(i = 0; i < l.batch; ++i){
        float *a = l.weights;
        float *b = net.input + i*l.c*l.h*l.w;
        float *c = net.workspace;

        gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n);

        col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs);
    }
    if (l.batch_normalize) {
        forward_batchnorm_layer(l, net);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h);
    }
    activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation);
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26

這裡的函式我在之前的文章中都分析過，我這裡主要分析一下這個函式的邏輯。

我們可以對比之前的卷積層，對比後發現區別有兩個：

A轉置了
col2im_cpu函式放在了卷積函式的後面

這幾點說明了什麼？deconvolutional確實是一種convolutional，只是它是一種轉置的卷積。

0x030102 backward_deconvolutional_layer

void backward_deconvolutional_layer(layer l, network net)
{
    int i;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    if(l.batch_normalize){
        backward_batchnorm_layer(l, net);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
    }

    //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));

    for(i = 0; i < l.batch; ++i){
        int m = l.c;
        int n = l.size*l.size*l.n;
        int k = l.h*l.w;

        float *a = net.input + i*m*k;
        float *b = net.workspace;
        float *c = l.weight_updates;

        im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, 
                l.size, l.stride, l.pad, b);
        gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n);

        if(net.delta){
            int m = l.c;
            int n = l.h*l.w;
            int k = l.size*l.size*l.n;

            float *a = l.weights;
            float *b = net.workspace;
            float *c = net.delta + i*n*m;

            gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40

這個函式的理解和之前的backward_convolutional_layer沒有太大區別，而且變化也不大。

0x030103 update_deconvolutional_layer

void update_deconvolutional_layer(layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;

    int size = l.size*l.size*l.c*l.n;
    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    if(l.scales){
        axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
        scal_cpu(l.n, momentum, l.scale_updates, 1);
    }

    axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(size, momentum, l.weight_updates, 1);
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20

同樣的這個函式也只是起到了更新引數的作用，和之前的update_convolutional_layer一樣。我們回到make_deconvolutional_layer函式

//make_deconvolutional_layer
    l.batch_normalize = batch_normalize;

    if(batch_normalize){
        l.scales = calloc(n, sizeof(float));
        l.scale_updates = calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            l.scales[i] = 1;
        }
...
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10

這裡都是一些引數的配置，我在之前文章中都有說過，這裡不再重複。

好的，parse_deconvolutional這個函式就結束了。

0x0302 parse_local

我們回到parse_network_cfg函式

        else if(lt == LOCAL){
            l = parse_local(options, params);
  
   1
   2

我們來看parse_local這個函式

local_layer parse_local(list *options, size_params params)
{
    int n = option_find_int(options, "filters",1);
    int size = option_find_int(options, "size",1);
    int stride = option_find_int(options, "stride",1);
    int pad = option_find_int(options, "pad",0);
    char *activation_s = option_find_str(options, "activation", "logistic");
    ACTIVATION activation = get_activation(activation_s);

    int batch,h,w,c;
    h = params.h;
    w = params.w;
    c = params.c;
    batch=params.batch;
    if(!(h && w && c)) error("Layer before local layer must output image.");

    local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation);

    return layer;
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20

上面的一些引數我在之前的文章中已經說過了，這裡就不再說明了。直接看關鍵函式make_local_layer

local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
{
    ...
    l.forward = forward_local_layer;
    l.backward = backward_local_layer;
    l.update = update_local_layer;

    ...
    fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);

    return l;
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12

同樣的前面的引數配置我們不管了，直接看三個關鍵的函式，先看第一個forward_local_layer

0x030201 forward_local_layer

void forward_local_layer(const local_layer l, network net)
{
    int out_h = local_out_height(l);
    int out_w = local_out_width(l);
    int i, j;
    int locations = out_h * out_w;

    for(i = 0; i < l.batch; ++i){
        copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1);
    }

    for(i = 0; i < l.batch; ++i){
        float *input = net.input + i*l.w*l.h*l.c;
        im2col_cpu(input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, net.workspace);
        float *output = l.output + i*l.outputs;
        for(j = 0; j < locations; ++j){
            float *a = l.weights + j*l.size*l.size*l.c*l.n;
            float *b = net.workspace + j;
            float *c = output + j;

            int m = l.n;
            int n = 1;
            int k = l.size*l.size*l.c;

            gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations);
        }
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30

我們先看前面兩個函式

int local_out_height(local_layer l)
{
    int h = l.h;
    if (!l.pad) h -= l.size;
    else h -= 1;
    return h/l.stride + 1;
}

int local_out_width(local_layer l)
{
    int w = l.w;
    if (!l.pad) w -= l.size;
    else w -= 1;
    return w/l.stride + 1;
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15

這兩個函式同樣是計算卷積後的影象的高度和寬度，和我們之前的卷積層計算公式對比

(l.h + 2*l.pad - l.size) / l.stride + 1
  
   1

和卷積層不一樣的是這裡沒有考慮pad。

我們可以對比之前的卷積層，對比後發現唯一一個區別就是引數b，我們這裡的引數b是變化的，這個恰恰符合了local layer的原理。local layer就是一種權重不共享的卷積層（早期的AlexNet和GoogleNet中有所應用）。

我這裡說的可能有一些抽象¬_¬，有時間畫個圖吧，先就這樣了。

0x030202 backward_local_layer

void backward_local_layer(local_layer l, network net)
{
    int i, j;
    int locations = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }

    for(i = 0; i < l.batch; ++i){
        float *input = net.input + i*l.w*l.h*l.c;
        im2col_cpu(input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, net.workspace);

        for(j = 0; j < locations; ++j){ 
            float *a = l.delta + i*l.outputs + j;
            float *b = net.workspace + j;
            float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
            int m = l.n;
            int n = l.size*l.size*l.c;
            int k = 1;

            gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
        }

        if(net.delta){
            for(j = 0; j < locations; ++j){ 
                float *a = l.weights + j*l.size*l.size*l.c*l.n;
                float *b = l.delta + i*l.outputs + j;
                float *c = net.workspace + j;

                int m = l.size*l.size*l.c;
                int n = 1;
                int k = l.n;

                gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
            }

            col2im_cpu(net.workspace, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, net.delta+i*l.c*l.h*l.w);
        }
    }
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44

這裡和之前backward_convolutional_layer的區別還是在b

0x030203 update_local_layer

void update_local_layer(local_layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;

    int locations = l.out_w*l.out_h;
    int size = l.size*l.size*l.c*l.n*locations;
    axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.outputs, momentum, l.bias_updates, 1);

    axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(size, momentum, l.weight_updates, 1);
}
  
   1
   2
   3
   4
   5
   6
   7
   8
   9
   10
   11
   12
   13
   14
   15
   16

這個函式沒什麼好說的，就是更新引數資訊。

至此parse_local函式就分析完了，下一章我們會回到parse_network_cfg函式

由於本人水平有限，文中有不對之處，希望大家指出，謝謝^_^!

YOLOv2原始碼分析（五）

文章全部YOLOv2原始碼分析

0x01 make_convolutional_layer

0x02 parse_convolutional

0x03 parse_network_cfg

0x00301 parse_deconvolutional

0x030101 forward_deconvolutional_layer

0x030102 backward_deconvolutional_layer

0x030103 update_deconvolutional_layer

0x0302 parse_local

0x030201 forward_local_layer

0x030202 backward_local_layer

0x030203 update_local_layer

文章全部YOLOv2原始碼分析

YOLOv2原始碼分析（五）

Flume NG原始碼分析（五）使用ThriftSource通過RPC方式收集日誌

OpenCV學習筆記（31）KAZE 演算法原理與原始碼分析（五）KAZE的原始碼優化及與SIFT的比較

GCC原始碼分析（五）——指令生成

YOLOv2原始碼分析（二）

YOLOv2原始碼分析（四）

YOLOv2原始碼分析（六）

AFNetWorking(3.0)原始碼分析（五）——AFHTTPRequestSerializer & AFHTTPResponseSerializer

轉載：GCC原始碼分析（五）——指令生成

vlc原始碼分析（五）流媒體的音視訊同步

libevent原始碼分析（五）

YOLOv2原始碼分析（一）

mochiweb原始碼分析（五）

YOLOv2原始碼分析（三）

RxJava2.0中flatMap操作符用法和原始碼分析（五）

spring4.2.9 java專案環境下ioc原始碼分析（五）——refresh之obtainFreshBeanFactory方法（@3預設標籤import,alias解析）

crawler4j原始碼分析（五）Robots協議

ZMQ原始碼分析（五） --TCP通訊

python3.6 原始碼分析（五）：類的建立

Java多執行緒之ThreadPoolExecutor實現原理和原始碼分析（五）

YOLOv2原始碼分析（五）

文章全部YOLOv2原始碼分析

0x01 make_convolutional_layer

0x02 parse_convolutional

0x03 parse_network_cfg

0x00301 parse_deconvolutional

0x030101 forward_deconvolutional_layer

0x030102 backward_deconvolutional_layer

0x030103 update_deconvolutional_layer

0x0302 parse_local

0x030201 forward_local_layer

0x030202 backward_local_layer

0x030203 update_local_layer

文章全部YOLOv2原始碼分析

相關推薦