1. 程式人生 > >detectron程式碼理解(三):RPN構建與相應的損失函式

detectron程式碼理解(三):RPN構建與相應的損失函式

1.RPN的構建

對RPN的構建在FPN.py的add_fpn_rpn_output函式中

def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
    """Add RPN on FPN specific outputs."""
    """
    blobs_in:
    [BlobReference("gpu_0/fpn_res5_2_sum_subsampled_2x"),
    BlobReference("gpu_0/fpn_res5_2_sum"), 
    BlobReference("gpu_0/fpn_res4_5_sum"), 
    BlobReference("gpu_0/fpn_res3_3_sum"), 
    BlobReference("gpu_0/fpn_res2_2_sum")]
    以BlobReference("gpu_0/fpn_res2_2_sum")為例
    3×3的卷積後,名稱變為conv_rpn_fpn2,featuremap的個數為256
    增加分類層後,名稱變為rpn_cls_logits_fpn2
    增加回歸層後,名稱變為rpn_bbox_pred_fpn2
    
    """
    
    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) #針對FPN設定的RPN,每一層的相應位置只產生RPN_ASPECT_RATIOS個anchor,面積是固定的。隨著層數的增加,面積增加
    dim_out = dim_in  #經過FPN後,特徵的維度是256

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid 這裡為6
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid   這裡為2
    assert len(blobs_in) == k_max - k_min + 1  
    for lvl in range(k_min, k_max + 1):  #從FPN的P2層開始,依次對於每一層FPN增加RPN的輸出
        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order 
        sc = spatial_scales[k_max - lvl]  # in reversed order
        slvl = str(lvl)

        if lvl == k_min:
            # Create conv ops with randomly initialized weights and
            # zeroed biases for the first FPN level; these will be shared by
            # all other FPN levels
            # RPN hidden representation
            #首先完成3×3的卷積先(用來語義空間轉換?)
            conv_rpn_fpn = model.Conv(
                bl_in,                 #輸入的blob名稱
                'conv_rpn_fpn' + slvl,   #輸出的blob名稱
                dim_in,                 #輸入維度:256
                dim_out,                #輸出維度:256
                kernel=3,
                pad=1,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores   增加邏輯分類層,3個anchor
            rpn_cls_logits_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,    
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            # Proposal bbox regression deltas 增加回歸層,3個anchor,每一個anchor4個引數
            rpn_bbox_pred_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn)
        else:
            # Share weights and biases,共享W和b,也就是每一次都採用P2層用的W和b
            sk_min = str(k_min)
            # RPN hidden representation
            conv_rpn_fpn = model.ConvShared(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight='conv_rpn_fpn' + sk_min + '_w',
                bias='conv_rpn_fpn' + sk_min + '_b'
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_cls_logits_fpn' + sk_min + '_w',
                bias='rpn_cls_logits_fpn' + sk_min + '_b'
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_bbox_pred_fpn' + sk_min + '_w',
                bias='rpn_bbox_pred_fpn' + sk_min + '_b'
            )
            print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn)

        if not model.train or cfg.MODEL.FASTER_RCNN:
            # Proposals are needed during:
            #  1) inference (== not model.train) for RPN only and Faster R-CNN
            #  OR
            #  2) training for Faster R-CNN
            # Otherwise (== training for RPN only), proposals are not needed
            lvl_anchors = generate_anchors(
                stride=2.**lvl,
                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS
            )
            #對於邏輯迴歸層增加sigmoid啟用
            rpn_cls_probs_fpn = model.net.Sigmoid(
                rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl
            )
            print(rpn_cls_probs_fpn,rpn_bbox_pred_fpn,'rpn_rois_fpn' + slvl,'rpn_roi_probs_fpn' + slvl)
            model.GenerateProposals(
                [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
                ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
                anchors=lvl_anchors,
                spatial_scale=sc
            )#產生的proposal將以rpn_rois_fpn_x命名,rpn_roi_probs_fpn_x將會表示每一個proposal的分數

對於每一層FPN

層數 語義轉換 邏輯分類(sigmoid啟用後 迴歸層 輸出rois
P2 conv_rpn_fpn2 rpn_cls_logits_fpn2(rpn_cls_probs_fpn2 rpn_cls_probs_fpn2 rpn_rois_fpn2 rpn_roi_probs_fpn2
P3 conv_rpn_fpn2 rpn_cls_logits_fpn3(rpn_cls_probs_fpn3
rpn_cls_probs_fpn3 rpn_rois_fpn3 rpn_roi_probs_fpn3
P4 conv_rpn_fpn4 rpn_cls_logits_fpn4(rpn_cls_probs_fpn4 rpn_cls_probs_fpn4 rpn_rois_fpn4 rpn_roi_probs_fpn4
P5 conv_rpn_fpn5 rpn_cls_logits_fpn5(rpn_cls_probs_fpn5 rpn_cls_probs_fpn5
rpn_rois_fpn5 rpn_roi_probs_fpn5
P6 conv_rpn_fpn6 rpn_cls_logits_fpn6(rpn_cls_probs_fpn6 rpn_cls_probs_fpn6 rpn_rois_fpn6 rpn_roi_probs_fpn6

黑色加粗部分是產生proposal(對應函式為model.GenerateProposals)的輸入,紅色加粗部分是產生proposal的輸出。

完成的內容是:

  • 從約20000個anchors中選取概率較大的 12000 個 anchor

  • 利用迴歸的位置引數,修正這 12000 個 anchor 的位置,得到 RoIs

  • 利用非極大值((Non-maximum suppression, NMS)抑制,選出概率最大的 2000 個 RoIs

2.為RPN構建損失

def add_fpn_rpn_losses(model):
    """Add RPN on FPN specific losses."""
    loss_gradients = {}
    for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
        slvl = str(lvl)
        # Spatially narrow the full-sized RPN label arrays to match the feature map
        # shape
        model.net.SpatialNarrowAs(
            ['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
            'rpn_labels_int32_fpn' + slvl
        )
        for key in ('targets', 'inside_weights', 'outside_weights'):
            model.net.SpatialNarrowAs(
                [
                    'rpn_bbox_' + key + '_wide_fpn' + slvl,
                    'rpn_bbox_pred_fpn' + slvl
                ],
                'rpn_bbox_' + key + '_fpn' + slvl
            )
        loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
            ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
            'loss_rpn_cls_fpn' + slvl,
            normalize=0,
            scale=(
                model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM /
                cfg.TRAIN.IMS_PER_BATCH
            )
        )
        # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is
        # handled by (1) setting bbox outside weights and (2) SmoothL1Loss
        # normalizes by IMS_PER_BATCH
        loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
            [
                'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl,
                'rpn_bbox_inside_weights_fpn' + slvl,
                'rpn_bbox_outside_weights_fpn' + slvl
            ],
            'loss_rpn_bbox_fpn' + slvl,
            beta=1. / 9.,
            scale=model.GetLossScale(),
        )
        loss_gradients.update(
            blob_utils.
            get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn])
        )
        model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
    return loss_gradients

以P2層為例:

[u'rpn_labels_int32_wide_fpn2', u'rpn_cls_logits_fpn2']  ——> rpn_labels_int32_fpn2
[u'rpn_bbox_targets_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_targets_fpn2
[u'rpn_bbox_inside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_inside_weights_fpn2
[u'rpn_bbox_outside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_outside_weights_fpn2
[u'rpn_cls_logits_fpn2', u'rpn_labels_int32_fpn2'] ——> loss_rpn_cls_fpn2   #RPN的分類損失
[u'rpn_bbox_pred_fpn2', u'rpn_bbox_targets_fpn2', u'rpn_bbox_inside_weights_fpn2', u'rpn_bbox_outside_weights_fpn2'] ——> loss_rpn_bbox_fpn2  #RPN的邊框損失