detectron程式碼理解(三):RPN構建與相應的損失函式
1.RPN的構建
對RPN的構建在FPN.py的add_fpn_rpn_output函式中
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" """ blobs_in: [BlobReference("gpu_0/fpn_res5_2_sum_subsampled_2x"), BlobReference("gpu_0/fpn_res5_2_sum"), BlobReference("gpu_0/fpn_res4_5_sum"), BlobReference("gpu_0/fpn_res3_3_sum"), BlobReference("gpu_0/fpn_res2_2_sum")] 以BlobReference("gpu_0/fpn_res2_2_sum")為例 3×3的卷積後,名稱變為conv_rpn_fpn2,featuremap的個數為256 增加分類層後,名稱變為rpn_cls_logits_fpn2 增加回歸層後,名稱變為rpn_bbox_pred_fpn2 """ num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) #針對FPN設定的RPN,每一層的相應位置只產生RPN_ASPECT_RATIOS個anchor,面積是固定的。隨著層數的增加,面積增加 dim_out = dim_in #經過FPN後,特徵的維度是256 k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid 這裡為6 k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid 這裡為2 assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): #從FPN的P2層開始,依次對於每一層FPN增加RPN的輸出 bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation #首先完成3×3的卷積先(用來語義空間轉換?) conv_rpn_fpn = model.Conv( bl_in, #輸入的blob名稱 'conv_rpn_fpn' + slvl, #輸出的blob名稱 dim_in, #輸入維度:256 dim_out, #輸出維度:256 kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores 增加邏輯分類層,3個anchor rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas 增加回歸層,3個anchor,每一個anchor4個引數 rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn) else: # Share weights and biases,共享W和b,也就是每一次都採用P2層用的W和b sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) #對於邏輯迴歸層增加sigmoid啟用 rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) print(rpn_cls_probs_fpn,rpn_bbox_pred_fpn,'rpn_rois_fpn' + slvl,'rpn_roi_probs_fpn' + slvl) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )#產生的proposal將以rpn_rois_fpn_x命名,rpn_roi_probs_fpn_x將會表示每一個proposal的分數
對於每一層FPN
層數 | 語義轉換 | 邏輯分類(sigmoid啟用後) | 迴歸層 | 輸出rois | |
P2 | conv_rpn_fpn2 | rpn_cls_logits_fpn2(rpn_cls_probs_fpn2) | rpn_cls_probs_fpn2 | rpn_rois_fpn2 | rpn_roi_probs_fpn2 |
P3 | conv_rpn_fpn2 | rpn_cls_logits_fpn3(rpn_cls_probs_fpn3 |
rpn_cls_probs_fpn3 | rpn_rois_fpn3 | rpn_roi_probs_fpn3 |
P4 | conv_rpn_fpn4 | rpn_cls_logits_fpn4(rpn_cls_probs_fpn4) | rpn_cls_probs_fpn4 | rpn_rois_fpn4 | rpn_roi_probs_fpn4 |
P5 | conv_rpn_fpn5 | rpn_cls_logits_fpn5(rpn_cls_probs_fpn5) | rpn_cls_probs_fpn5 |
rpn_rois_fpn5 | rpn_roi_probs_fpn5 |
P6 | conv_rpn_fpn6 | rpn_cls_logits_fpn6(rpn_cls_probs_fpn6) | rpn_cls_probs_fpn6 | rpn_rois_fpn6 | rpn_roi_probs_fpn6 |
黑色加粗部分是產生proposal(對應函式為model.GenerateProposals)的輸入,紅色加粗部分是產生proposal的輸出。
完成的內容是:
-
從約20000個anchors中選取概率較大的 12000 個 anchor
-
利用迴歸的位置引數,修正這 12000 個 anchor 的位置,得到 RoIs
-
利用非極大值((Non-maximum suppression, NMS)抑制,選出概率最大的 2000 個 RoIs
2.為RPN構建損失
def add_fpn_rpn_losses(model):
"""Add RPN on FPN specific losses."""
loss_gradients = {}
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
slvl = str(lvl)
# Spatially narrow the full-sized RPN label arrays to match the feature map
# shape
model.net.SpatialNarrowAs(
['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
'rpn_labels_int32_fpn' + slvl
)
for key in ('targets', 'inside_weights', 'outside_weights'):
model.net.SpatialNarrowAs(
[
'rpn_bbox_' + key + '_wide_fpn' + slvl,
'rpn_bbox_pred_fpn' + slvl
],
'rpn_bbox_' + key + '_fpn' + slvl
)
loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
'loss_rpn_cls_fpn' + slvl,
normalize=0,
scale=(
model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM /
cfg.TRAIN.IMS_PER_BATCH
)
)
# Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is
# handled by (1) setting bbox outside weights and (2) SmoothL1Loss
# normalizes by IMS_PER_BATCH
loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
[
'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl,
'rpn_bbox_inside_weights_fpn' + slvl,
'rpn_bbox_outside_weights_fpn' + slvl
],
'loss_rpn_bbox_fpn' + slvl,
beta=1. / 9.,
scale=model.GetLossScale(),
)
loss_gradients.update(
blob_utils.
get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn])
)
model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
return loss_gradients
以P2層為例:
[u'rpn_labels_int32_wide_fpn2', u'rpn_cls_logits_fpn2'] ——> rpn_labels_int32_fpn2
[u'rpn_bbox_targets_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_targets_fpn2
[u'rpn_bbox_inside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_inside_weights_fpn2
[u'rpn_bbox_outside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_outside_weights_fpn2
[u'rpn_cls_logits_fpn2', u'rpn_labels_int32_fpn2'] ——> loss_rpn_cls_fpn2 #RPN的分類損失
[u'rpn_bbox_pred_fpn2', u'rpn_bbox_targets_fpn2', u'rpn_bbox_inside_weights_fpn2', u'rpn_bbox_outside_weights_fpn2'] ——> loss_rpn_bbox_fpn2 #RPN的邊框損失