1. 程式人生 > >使用tensorflow object detection API 訓練自己的目標檢測模型 (二)

使用tensorflow object detection API 訓練自己的目標檢測模型 (二)

        在上一篇部落格"使用tensorflow object detection API 訓練自己的目標檢測模型 (一)"中介紹瞭如何使用LabelImg標記資料集,生成.xml檔案,經過個人的手工標註,形成了一個大概有兩千張圖片的資料集。

但是這仍然不滿足tensorflow object detection API對訓練資料的格式要求(API要求tfrecord個格式的資料),所以下面將.xml檔案轉化為tfrecord格式的檔案,

在models工程下新建資料夾dataset,目錄結構如圖所示:data資料夾用來存放轉換的資料。

轉換過程分為兩步:

第一步:將標記生成xml檔案轉換為csv檔案,在dataset下新建xml_to_csv.py檔案,程式碼如下:

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET


def xml_to_csv(path):
    xml_list = []
    # 讀取註釋檔案
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']

    # 將所有資料分為樣本集和驗證集,一般按照3:1的比例
    train_list = xml_list[0: int(len(xml_list) * 0.67)]
    eval_list = xml_list[int(len(xml_list) * 0.67) + 1: ]

    # 儲存為CSV格式
    train_df = pd.DataFrame(train_list, columns=column_name)
    eval_df = pd.DataFrame(eval_list, columns=column_name)
    train_df.to_csv('data/train.csv', index=None)
    eval_df.to_csv('data/eval.csv', index=None)


def main():
    # path = 'E:\\\data\\\Images'
    path = r'E:\smart city\data_distribution\xml_new'  # path引數更具自己xml檔案所在的資料夾路徑修改
xml_to_csv(path) print('Successfully converted xml to csv.') main()
path = r'E:\smart city\data_distribution\xml_new' # path引數更具自己xml檔案所在的資料夾路徑修改 xml_to_csv(path) print('Successfully converted xml to csv.') main()

修改path路徑, 執行程式碼,就可以進行轉換:生成兩個csv檔案

第二步:將生成的csv檔案轉換為tfrecord格式

在dataset下新建generate_tfrecord.py檔案,程式碼如下:

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import os
import io
import pandas as pd
import tensorflow as tf

from PIL import Image
# from object_detection.utils import dataset_util
from research.object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS


# 將分類名稱轉成ID號
def class_text_to_int(row_label):
    if row_label == 'plate':
        return 1
    # elif row_label == 'car':
    #     return 2
    # elif row_label == 'person':
    #     return 3
    # elif row_label == 'kite':
    #     return 4
    else:
        print('NONE: ' + row_label)
        # None


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    print(os.path.join(path, '{}'.format(group.filename)))
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = (group.filename + '.jpg').encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(csv_input, output_path, imgPath):
    writer = tf.python_io.TFRecordWriter(output_path)
    path = imgPath
    examples = pd.read_csv(csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    print('Successfully created the TFRecords: {}'.format(output_path))


if __name__ == '__main__':

    # imgPath = 'E:\data\Images'
    imgPath = r'E:\smart city\data_distribution\picyure_new'

    # 生成train.record檔案
    output_path = 'data/train.record'
    csv_input = 'data/train.csv'
    main(csv_input, output_path, imgPath)

    # 生成驗證檔案 eval.record
    output_path = 'data/eval.record'
    csv_input = 'data/eval.csv'
    main(csv_input, output_path, imgPath)

 if row_label == 'plate':
        return 1
    # elif row_label == 'car':
    #     return 2
    # elif row_label == 'person':
    #     return 3
    # elif row_label == 'kite':
    #     return 4
    else:
        print('NONE: ' + row_label)
        # None


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    print(os.path.join(path, '{}'.format(group.filename)))
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = (group.filename + '.jpg').encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(csv_input, output_path, imgPath):
    writer = tf.python_io.TFRecordWriter(output_path)
    path = imgPath
    examples = pd.read_csv(csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    print('Successfully created the TFRecords: {}'.format(output_path))


if __name__ == '__main__':

    # imgPath = 'E:\data\Images'
    imgPath = r'E:\smart city\data_distribution\picyure_new'

    # 生成train.record檔案
    output_path = 'data/train.record'
    csv_input = 'data/train.csv'
    main(csv_input, output_path, imgPath)

    # 生成驗證檔案 eval.record
    output_path = 'data/eval.record'
    csv_input = 'data/eval.csv'
    main(csv_input, output_path, imgPath)

這裡有兩處需要修改:

第一處:

分類號轉換為ID:

# 將分類名稱轉成ID號
def class_text_to_int(row_label):
    if row_label == 'plate':
        return 1
    # elif row_label == 'car':
    #     return 2
    # elif row_label == 'person':
    #     return 3
    # elif row_label == 'kite':
    #     return 4
    else:
        print('NONE: ' + row_label)
        # None

根據自己在LabelImg中打的標籤進行修改, 比如我只有plate標籤:

第二處:

# imgPath = 'E:\data\Images'
imgPath = r'E:\smart city\data_distribution\picyure_new'

對圖片的路徑進行修改, 圖片應該要與xml檔案一一對應的:

執行generate_tfrecord.py檔案,可以看到已經生成了兩個tfrecord檔案:

現在資料轉化完畢,開始訓練模型之前。要對API做一些配置:

(1)在data資料夾下建立label_map.pbtxtx檔案:

檔案內容如下:

item {
  id: 1
  name: 'plate'
}
item {
  id: 2
  name: 'xxx'
}
item {
  id: 3
  name: 'xxx'
}

檔案中id 和 name應該和上面修改的函式class_text_to_int()中的相對應:如果有多個檢測目標,就繼續新增item(我的只有plate一項,後面新增的兩個item作為示例,實際執行時會刪掉),記得儲存pbtxt檔案()

根據需要下載具體的模型,在dataset資料夾下新建fine_tune_model資料夾,將下載的模型解壓後,複製三個.ckpt檔案放到fine_tune_model資料夾下。

(3)配置管道檔案

找到目錄research\object_detection\samples\configs\ssd_inception_v2_pets.config檔案,將此檔案複製到dataset/data資料夾下:

並對以下的地方做出修改:

第一處:第8-9行

ssd {
    num_classes: 1      # 賊修改為自己檢測的目標的類別數, 例如我的是一個   
    box_coder {
第二處:151-152行
# 賊修改為自己檢測的目標的類別數, 例如我的是一個   
    box_coder {
第二處:151-152行
fine_tune_checkpoint: "E:/code/112/models/dataset/fine_tune_model/model.ckpt"   # 注意修改fine_tune_checkpoint
fine_tune_checkpoint_type:  "detection"
第三處:169-175行
train_input_reader: {
  tf_record_input_reader {
    # input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
    input_path: "E:/code/112/models/dataset/data/train.record"   # 將 input_path引數改為前面生成train.record檔案的路徑
  }
  label_map_path: "E:/code/112/models/dataset/data/label_map.pbtxt"  # 同理將這裡的路徑改為上面生成的label_map.pbtxt檔案的路徑 
} 
  # 將 input_path引數改為前面生成train.record檔案的路徑
  }
  label_map_path: "E:/code/112/models/dataset/data/label_map.pbtxt"  # 同理將這裡的路徑改為上面生成的label_map.pbtxt檔案的路徑 
} 
第四處:184-191
eval_input_reader: {
  tf_record_input_reader {
    input_path: "E:/code/112/models/dataset/data/eval.record"       # 將 input_path引數改為前面生成eval.record檔案的路徑
  }
  label_map_path: "E:/code/112/models/dataset/data/label_map.pbtxt"    # 同理將這裡的路徑改為上面生成的label_map.pbtxt檔案的路徑
  shuffle: false
  num_readers: 1
}# 將 input_path引數改為前面生成eval.record檔案的路徑
  }
  label_map_path: "E:/code/112/models/dataset/data/label_map.pbtxt"    # 同理將這裡的路徑改為上面生成的label_map.pbtxt檔案的路徑
  shuffle: false
  num_readers: 1
}

(4).配置完管道檔案後,再到models\research\object_detection\train.py檔案,如果遇到train.py檔案找不到匯入的包,(比如報錯 no module named object_detection)

按如下方式修改

# from object_detection import trainer
from research.object_detection import trainer
# from object_detection.builders import dataset_builder
from research.object_detection.builders import dataset_builder
# from object_detection.builders import graph_rewriter_builder
from research.object_detection.builders import graph_rewriter_builder
# from object_detection.builders import model_builder
from research.object_detection.builders import model_builder
# from object_detection.utils import config_util
from research.object_detection.utils import config_util
# from object_detection.utils import dataset_util
from research.object_detection.utils import dataset_util

匯入他們的完整路徑就不會報錯了, 對應的models\research\object_detection\trainer.py也要修改:

# from object_detection.builders import optimizer_builder
from research.object_detection.builders import optimizer_builder
# from object_detection.builders import preprocessor_builder
from research.object_detection.builders import preprocessor_builder
# from object_detection.core import batcher
from research.object_detection.core import batcher
# from object_detection.core import preprocessor
from research.object_detection.core import preprocessor
# from object_detection.core import standard_fields as fields
from research.object_detection.core import standard_fields as fields
# from object_detection.utils import ops as util_ops
from research.object_detection.utils import ops as util_ops
# from object_detection.utils import variables_helper
from research.object_detection.utils import variables_helper
# from deployment import model_deploy
from research.slim.deployment import model_deploy

除此之外,執行train.py檔案時,還會有大量的no module named object_detection錯誤,我是一一手動修改匯入完整路徑,這種方法很不好, 可以通過sys.path.append('上層目錄路徑')引入上一層的目錄:

同時還有找不到no module named net之類的錯誤, net檔案在research/object_detection/slim資料夾下,匯入完整的路徑就可以解決。

 到models\research\object_detection\train.py檔案下,在檔案中做一些修改:

flags.DEFINE_string('train_dir', r'E:\code\112\models\dataset\training',               # 指定模型的輸出路徑
                    'Directory to save the checkpoints and training summaries.')

flags.DEFINE_string('pipeline_config_path', r'E:\code\112\models\dataset\data\ssd_mobilenet_v2_coco_self.config',    # 配置檔案路徑
                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
                    'file. If provided, other configs are ignored')

執行train.py檔案就可以開始訓練了。。。。

訓練結果:

執行程式碼後發現出現異常:報錯的資訊為:

INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.ResourceExhaustedError'>, OOM when allocating tensor with shape[1200,1600,3] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
     [[Node: RandomHorizontalFlip/cond/flip_left_right/ReverseV2 = ReverseV2[T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](RandomHorizontalFlip/cond/Switch_1:1, RandomHorizontalFlip/cond/flip_left_right/ReverseV2/axis, ^RandomHorizontalFlip/cond/flip_left_right/assert_positive/assert_less/Assert/Assert)]]

Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

經過查詢,錯誤的原因應該是顯示卡記憶體溢位,我的筆記本跑不了這個模型。

借用了一臺1070 16G顯示卡的遊戲本跑一波:

生成的training檔案下的模型檔案:

(4)匯出訓練的結果,生成pb檔案:

將export_inference_graph.py檔案複製到dataset資料夾下,檔案的具體內容為:

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

r"""Tool to export an object detection model for inference.

Prepares an object detection tensorflow graph for inference using model
configuration and an optional trained checkpoint. Outputs inference
graph, associated checkpoint files, a frozen inference graph and a
SavedModel (https://tensorflow.github.io/serving/serving_basic.html).

The inference graph contains one of three input nodes depending on the user
specified option.
  * `image_tensor`: Accepts a uint8 4-D tensor of shape [None, None, None, 3]
  * `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None]
    containing encoded PNG or JPEG images. Image resolutions are expected to be
    the same if more than 1 image is provided.
  * `tf_example`: Accepts a 1-D string tensor of shape [None] containing
    serialized TFExample protos. Image resolutions are expected to be the same
    if more than 1 image is provided.

and the following output nodes returned by the model.postprocess(..):
  * `num_detections`: Outputs float32 tensors of the form [batch]
      that specifies the number of valid boxes per image in the batch.
  * `detection_boxes`: Outputs float32 tensors of the form
      [batch, num_boxes, 4] containing detected boxes.
  * `detection_scores`: Outputs float32 tensors of the form
      [batch, num_boxes] containing class scores for the detections.
  * `detection_classes`: Outputs float32 tensors of the form
      [batch, num_boxes] containing classes for the detections.
  * `detection_masks`: Outputs float32 tensors of the form
      [batch, num_boxes, mask_height, mask_width] containing predicted instance
      masks for each box if its present in the dictionary of postprocessed
      tensors returned by the model.

Notes:
 * This tool uses `use_moving_averages` from eval_config to decide which
   weights to freeze.

Example Usage:
--------------
python export_inference_graph \
    --input_type image_tensor \
    --pipeline_config_path path/to/ssd_inception_v2.config \
    --trained_checkpoint_prefix path/to/model.ckpt \
    --output_directory path/to/exported_model_directory

The expected output would be in the directory
path/to/exported_model_directory (which is created if it does not exist)
with contents:
 - graph.pbtxt
 - model.ckpt.data-00000-of-00001
 - model.ckpt.info
 - model.ckpt.meta
 - frozen_inference_graph.pb
 + saved_model (a directory)

Config overrides (see the `config_override` flag) are text protobufs
(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
certain fields in the provided pipeline_config_path.  These are useful for
making small changes to the inference graph that differ from the training or
eval config.

Example Usage (in which we change the second stage post-processing score
threshold to be 0.5):

python export_inference_graph \
    --input_type image_tensor \
    --pipeline_config_path path/to/ssd_inception_v2.config \
    --trained_checkpoint_prefix path/to/model.ckpt \
    --output_directory path/to/exported_model_directory \
    --config_override " \
            model{ \
              faster_rcnn { \
                second_stage_post_processing { \
                  batch_non_max_suppression { \
                    score_threshold: 0.5 \
                  } \
                } \
              } \
            }"
"""
import tensorflow as tf
from google.protobuf import text_format
from research.object_detection import exporter
from research.object_detection.protos import pipeline_pb2

slim = tf.contrib.slim
flags = tf.app.flags

flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
                    'one of [`image_tensor`, `encoded_image_string_tensor`, '
                    '`tf_example`]')
flags.DEFINE_string('input_shape', None,
                    'If input_type is `image_tensor`, this can explicitly set '
                    'the shape of this input tensor to a fixed size. The '
                    'dimensions are to be provided as a comma-separated list '
                    'of integers. A value of -1 can be used for unknown '
                    'dimensions. If not specified, for an `image_tensor, the '
                    'default shape will be partially specified as '
                    '`[None, None, None, 3]`.')
flags.DEFINE_string('pipeline_config_path', r'E:\code\112\models\dataset\data\ssd_mobilenet_v2_coco_self.config',
                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
                    'file.')
flags.DEFINE_string('trained_checkpoint_prefix', r'E:\code\112\models\dataset\training\model.ckpt-1500',
                    'Path to trained checkpoint, typically of the form '
                    'path/to/model.ckpt')
flags.DEFINE_string('output_directory', r'E:\code\112\models\dataset\data\frozen_inference_graph.pb', 'Path to write outputs.')
flags.DEFINE_string('config_override', '',
                    'pipeline_pb2.TrainEvalPipelineConfig '
                    'text proto to override pipeline_config_path.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
FLAGS = flags.FLAGS


def main(_):
  pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
  with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
    text_format.Merge(f.read(), pipeline_config)
  text_format.Merge(FLAGS.config_override, pipeline_config)
  if FLAGS.input_shape:
    input_shape = [
        int(dim) if dim != '-1' else None
        for dim in FLAGS.input_shape.split(',')
    ]
  else:
    input_shape = None
  exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
                                  FLAGS.trained_checkpoint_prefix,
                                  FLAGS.output_directory, input_shape)


if __name__ == '__main__':
  tf.app.run()
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
                    'one of [`image_tensor`, `encoded_image_string_tensor`, '
                    '`tf_example`]')
flags.DEFINE_string('input_shape', None,
                    'If input_type is `image_tensor`, this can explicitly set '
                    'the shape of this input tensor to a fixed size. The '
                    'dimensions are to be provided as a comma-separated list '
                    'of integers. A value of -1 can be used for unknown '
                    'dimensions. If not specified, for an `image_tensor, the '
                    'default shape will be partially specified as '
                    '`[None, None, None, 3]`.')
flags.DEFINE_string('pipeline_config_path', r'E:\code\112\models\dataset\data\ssd_mobilenet_v2_coco_self.config',
                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
                    'file.')
flags.DEFINE_string('trained_checkpoint_prefix', r'E:\code\112\models\dataset\training\model.ckpt-1500',
                    'Path to trained checkpoint, typically of the form '
                    'path/to/model.ckpt')
flags.DEFINE_string('output_directory', r'E:\code\112\models\dataset\data\frozen_inference_graph.pb', 'Path to write outputs.')
flags.DEFINE_string('config_override', '',
                    'pipeline_pb2.TrainEvalPipelineConfig '
                    'text proto to override pipeline_config_path.')
tf.app.flags.mark_flag_as_required('pipeline_config_path')
tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
tf.app.flags.mark_flag_as_required('output_directory')
FLAGS = flags.FLAGS


def main(_):
  pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
  with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
    text_format.Merge(f.read(), pipeline_config)
  text_format.Merge(FLAGS.config_override, pipeline_config)
  if FLAGS.input_shape:
    input_shape = [
        int(dim) if dim != '-1' else None
        for dim in FLAGS.input_shape.split(',')
    ]
  else:
    input_shape = None
  exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
                                  FLAGS.trained_checkpoint_prefix,
                                  FLAGS.output_directory, input_shape)


if __name__ == '__main__':
  tf.app.run()

需要對以下標記為紅色的地方做一些修改:

input_ypye :    image_tensor

flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
                    'one of [`image_tensor`, `encoded_image_string_tensor`, '
                    '`tf_example`]')'input_type', 'image_tensor', 'Type of input node. Can be '
                    'one of [`image_tensor`, `encoded_image_string_tensor`, '
                    '`tf_example`]')

pipeline_config_path:   models\dataset\data\ssd_inception_v2_pets.config     # 上面配置的管道檔案的路徑

flags.DEFINE_string('pipeline_config_path', r'E:\code\112\models\dataset\data\ssd_inception_v2_pets.config',
                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
                    'file.')'pipeline_config_path', r'E:\code\112\models\dataset\data\ssd_inception_v2_pets.config',
                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
                    'file.')

trained_checkpoint_prefix: training 資料夾中生成的模型檔案。

flags.DEFINE_string('trained_checkpoint_prefix', r'E:\code\112\models\dataset\training\model.ckpt-1500',
                    'Path to trained checkpoint, typically of the form '
                    'path/to/model.ckpt')trained_checkpoint_prefix', r'E:\code\112\models\dataset\training\model.ckpt-1500',
                    'Path to trained checkpoint, typically of the form '
                    'path/to/model.ckpt')

output_directory:  生成pb檔案的存放路徑

flags.DEFINE_string('output_directory', r'E:\code\112\models\dataset\data\frozen_inference_graph.pb', 'Path to write outputs.')y', r'E:\code\112\models\dataset\data\frozen_inference_graph.pb', 'Path to write outputs.')

接下來執行export_inference_graph.py檔案:

最後可以訓練的模型進行測試了:

將上一片中配置完object detection API 的測試檔案object_detection_tutorial.py複製到dataset資料夾下,並在dataset資料夾新建tesi_image資料夾, 用於存放測試圖片,並且對object_detection_tutorial.py檔案完整程式碼:

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image


# # This is needed to display the images.
# %matplotlib inline

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")

# from utils import label_map_util
# from utils import visualization_utils as vis_util
from research.object_detection.utils import label_map_util
from research.object_detection.utils import visualization_utils as vis_util

# What model to download.
# MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'     # 這部分為下載模型的,現在我們不用再下載模型了,所以註釋掉
# MODEL_FILE = MODEL_NAME + '.tar.gz'
# DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = r'E:\code\112\models\dataset\data\frozen_inference_graph.pb\frozen_inference_graph.pb'   #上一步生成的.pb檔案的路徑

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = r'E:\code\112\models\dataset\data\label_map.pbtxt'            #新增pbtxt檔案的路徑

NUM_CLASSES = 1      #我的類別數為1, 這裡根據自己的類別數修改

# download model                 # 這部分也是下載模型的,註釋掉即可
# opener = urllib.request.URLopener()
# 下載模型,如果已經下載好了下面這句程式碼可以註釋掉
# opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
# tar_file = tarfile.open(MODEL_FILE)
# for file in tar_file.getmembers():
#     file_name = os.path.basename(file.name)
#     if 'frozen_inference_graph.pb' in file_name:
#         tar_file.extract(file, os.getcwd())

# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)


# Helper code
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)


# For the sake of simplicity we will use only 2 images:   # 這裡說明測試圖片的命名規則為imagen.jpg, 遵守規則即可
# image1.jpg
# image2.jpg

# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = r'E:\code\112\models\dataset\tesi_images'    # 存放測試圖片的路徑
TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 22)]  # 修改測試圖片的張數range(1, n + 1), 為測試圖片的張數

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        for image_path in TEST_IMAGE_PATHS:
            image = Image.open(image_path)
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            scores = detection_graph.get_tensor_by_name('detection_scores:0')
            classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            # Actual detection.
            (boxes, scores, classes, num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=8)
            plt.figure(figsize=IMAGE_SIZE)
            plt.imshow(image_np)
            plt.show()
# MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'     # 這部分為下載模型的,現在我們不用再下載模型了,所以註釋掉
# MODEL_FILE = MODEL_NAME + '.tar.gz'
# DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = r'E:\code\112\models\dataset\data\frozen_inference_graph.pb\frozen_inference_graph.pb'   #上一步生成的.pb檔案的路徑

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = r'E:\code\112\models\dataset\data\label_map.pbtxt'            #新增pbtxt檔案的路徑

NUM_CLASSES = 1      #我的類別數為1, 這裡根據自己的類別數修改

# download model                 # 這部分也是下載模型的,註釋掉即可
# opener = urllib.request.URLopener()
# 下載模型,如果已經下載好了下面這句程式碼可以註釋掉
# opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
# tar_file = tarfile.open(MODEL_FILE)
# for file in tar_file.getmembers():
#     file_name = os.path.basename(file.name)
#     if 'frozen_inference_graph.pb' in file_name:
#         tar_file.extract(file, os.getcwd())

# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)


# Helper code
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)


# For the sake of simplicity we will use only 2 images:   # 這裡說明測試圖片的命名規則為imagen.jpg, 遵守規則即可
# image1.jpg
# image2.jpg

# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = r'E:\code\112\models\dataset\tesi_images'    # 存放測試圖片的路徑
TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 22)]  # 修改測試圖片的張數range(1, n + 1), 為測試圖片的張數

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        for image_path in TEST_IMAGE_PATHS:
            image = Image.open(image_path)
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            scores = detection_graph.get_tensor_by_name('detection_scores:0')
            classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            # Actual detection.
            (boxes, scores, classes, num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=8)
            plt.figure(figsize=IMAGE_SIZE)
            plt.imshow(image_np)
            plt.show()

對以上標記為紅色程式碼的部分進行修改即可:執行object_detection_tutorial.py檔案,我的測試結果:(貼幾張圖)

效果還是可以的,關於使用tensorflow object detection API 訓練自己的目標檢測模型 先介紹到這裡微笑

附上完整的專案目錄: