1. 程式人生 > >初窺Tensorflow Object Detection API 原始碼之(1) FeatureExtractor

初窺Tensorflow Object Detection API 原始碼之(1) FeatureExtractor

models/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py

overview

class FasterRCNNResnetV1FeatureExtractor

init

def __init__(self,
               is_training,
               first_stage_features_stride,
               batch_norm_trainable=False,
               reuse_weights=None,
               weight_decay=0.0
)
:
self._is_training = is_training self._first_stage_features_stride = first_stage_features_stride self._train_batch_norm = (batch_norm_trainable and is_training) self._reuse_weights = reuse_weights self._weight_decay = weight_decay

preprocess

@abstractmethod
  def preprocess
(self, resized_inputs):
"""Feature-extractor specific preprocessing (minus image resizing).""" pass

extract_proposal_features

def extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    This function is responsible for extracting feature maps from preprocessed
    images.  These features are used by the region proposal network (RPN) to
    predict proposals.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
    """
with tf.variable_scope(scope, values=[preprocessed_inputs]): return self._extract_proposal_features(preprocessed_inputs, scope)

_extract_proposal_features

@abstractmethod
  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features, to be overridden."""
    pass

這裡省略幾個函式的介紹
具體可以參考檔案models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py

class FasterRCNNResnetV1FeatureExtractor( faster_rcnn_meta_arch.FasterRCNNFeatureExtractor)

def __init__(self,
               architecture,
               resnet_model,
               is_training,
               first_stage_features_stride,
               batch_norm_trainable=False,
               reuse_weights=None,
               weight_decay=0.0):
    if first_stage_features_stride != 8 and first_stage_features_stride != 16:
      raise ValueError('`first_stage_features_stride` must be 8 or 16.')
    self._architecture = architecture
    self._resnet_model = resnet_model
    super(FasterRCNNResnetV1FeatureExtractor, self).__init__(
        is_training, first_stage_features_stride, batch_norm_trainable,
        reuse_weights, weight_decay)

preprocess

def preprocess(self, resized_inputs):
    channel_means = [123.68, 116.779, 103.939]
    return resized_inputs - [[channel_means]]

VGG 預處理,在後期會被呼叫到

Graph

_extract_proposal_features

提取RPN的Feature輸入

with tf.control_dependencies([shape_assert]):
      # Disables batchnorm for fine-tuning with smaller batch sizes.
      # TODO: Figure out if it is needed when image
      # batch size is bigger.
      with slim.arg_scope(
          resnet_utils.resnet_arg_scope(
              batch_norm_epsilon=1e-5,
              batch_norm_scale=True,
              weight_decay=self._weight_decay)):
        with tf.variable_scope(
            self._architecture, reuse=self._reuse_weights) as var_scope:
          _, activations = self._resnet_model(
              preprocessed_inputs,
              num_classes=None,
              is_training=self._train_batch_norm,
              global_pool=False,
              output_stride=self._first_stage_features_stride,
              spatial_squeeze=False,
              scope=var_scope)

    handle = scope + '/%s/block3' % self._architecture
    return activations[handle]

這裡有幾處細節
細節1:tf.Assert
判斷輸入影象的尺寸是否大於33×33
細節2:tf.control_dependencies
tf.Assert先執行
細節3:倒數第二行的“block3”
_extract_proposal_features的輸出取的是Resnet101的Block3的輸出

_extract_box_classifier_features

提取box的Feature輸入

def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
      with slim.arg_scope(
          resnet_utils.resnet_arg_scope(
              batch_norm_epsilon=1e-5,
              batch_norm_scale=True,
              weight_decay=self._weight_decay)):
        with slim.arg_scope([slim.batch_norm],
                            is_training=self._train_batch_norm):
          blocks = [
              resnet_utils.Block('block4', resnet_v1.bottleneck, [{
                  'depth': 2048,
                  'depth_bottleneck': 512,
                  'stride': 1
              }] * 3)
          ]
          proposal_classifier_features = resnet_utils.stack_blocks_dense(
              proposal_feature_maps, blocks)
    return proposal_classifier_features

slim.arg_scope和resnet_utils.resnet_arg_scope(詳見slim文件和resnet_utils檔案)
查閱了一下,作用是為slim的conv2d,batch_norm,max_pool2d做了一個引數約定
整個函式的輸出為在_extract_proposal_features的輸出基礎上加了block4(3個unit)

class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor)

def __init__(self,
               is_training,
               first_stage_features_stride,
               batch_norm_trainable=False,
               reuse_weights=None,
               weight_decay=0.0):
    super(FasterRCNNResnet101FeatureExtractor, self).__init__(
        'resnet_v1_101', resnet_v1.resnet_v1_101, is_training,
        first_stage_features_stride, batch_norm_trainable,
        reuse_weights, weight_decay)

用指定引數初始化了父類FasterRCNNResnetV1FeatureExtractor

{
    architecture:resnet_v1_101,
    resnet_model:resnet_v1.resnet_v1_101
}