1. 程式人生 > >將自己手動標註的資料集(PascalVOC格式)轉化為.TFRecord格式

將自己手動標註的資料集(PascalVOC格式)轉化為.TFRecord格式

 

“ 一個人如果不能學會遺忘,那將是很痛苦的事,別再自尋煩惱,快把痛苦的事給忘了吧!”

 

為了能夠使用Object Detection API~

需要將資料集格式轉化為.TFRecord再進行訓練~

至於,

如何使用Tensorflow官方的Object Detection API

包括下載、依賴(protobuf等)安裝、跑demo、訓練自己的資料過程~

推薦一篇博文:  1.https://blog.csdn.net/rookie_wei/article/details/81143814

                             2.

https://blog.csdn.net/rookie_wei/article/details/81210499

                             3.https://blog.csdn.net/rookie_wei/article/details/81275663

整個過程比較詳細,可以參考~

本篇主要介紹如何將已標註好的資料集轉化成Tensorflow通用的.TFRecord格式~

 

注意:本程式是我自己檢測的6類object,根據情況修改!

#-*- coding=utf-8 -*-
# File Name: Create_TFRecord.py
# Author: HZ
# Created Time: 2018-06-06 
import os
import sys
import random
 
import numpy as np
import tensorflow as tf
 
import xml.etree.ElementTree as ET #操作xml檔案
 
#我的標籤定義有6類,根據自己的圖片而定
VOC_LABELS = {
    'none': (0, 'Background'),
    'person': (1, 'Person'),
    'car': (2, 'Car'),
    'bus': (3, 'Bus'),
    'truck': (4, 'Truck'),
    'cyclist': (5, 'cyclist')
}
 
# 圖片和標籤存放的資料夾.
DIRECTORY_ANNOTATIONS = 'Annotations/'
DIRECTORY_IMAGES = 'JPEGImages/'
 
# 隨機種子.
RANDOM_SEED = 4242  
 
#生成整數型,浮點型和字串型的屬性
def int64_feature(value):
    
    if not isinstance(value, list):
        value = [value]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
 
def float_feature(value):
    
    if not isinstance(value, list):
        value = [value]
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 
def bytes_feature(value):
    
    if not isinstance(value, list):
        value = [value]
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
 
#圖片處理
def _process_image(directory, name):
   
    # Read the image file.
    filename = directory + DIRECTORY_IMAGES + name + '.jpg'
    image_data = tf.gfile.FastGFile(filename, 'rb').read()
 
    # Read the XML annotation file.
    filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml')
    tree = ET.parse(filename)
    root = tree.getroot()
 
    # Image shape.
    size = root.find('size')
    shape = [int(size.find('height').text),
             int(size.find('width').text),
             int(size.find('depth').text)]
    # Find annotations.
    bboxes = []
    labels = []
    labels_text = []
    difficult = []
    truncated = []
    for obj in root.findall('object'):
        label = obj.find('name').text
        labels.append(int(VOC_LABELS[label][0]))
        labels_text.append(label.encode('ascii')) #變為ascii格式
 
        if obj.find('difficult'):
            difficult.append(int(obj.find('difficult').text))
        else:
            difficult.append(0)
        if obj.find('truncated'):
            truncated.append(int(obj.find('truncated').text))
        else:
            truncated.append(0)
 
        bbox = obj.find('bndbox')
        a=float(bbox.find('ymin').text) / shape[0]
        b=float(bbox.find('xmin').text) / shape[1]
        a1=float(bbox.find('ymax').text) / shape[0]
        b1=float(bbox.find('xmax').text) / shape[1]
        a_e=a1-a
        b_e=b1-b
        if abs(a_e)<1 and abs(b_e)<1:
            bboxes.append((a,b,a1,b1))
        
    return image_data, shape, bboxes, labels, labels_text, difficult, truncated
 
#轉化樣例
def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
                        difficult, truncated):
    
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    for b in bboxes:
        assert len(b) == 4
        # pylint: disable=expression-not-assigned
        [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
        # pylint: enable=expression-not-assigned
 
    image_format = b'JPEG'
    example = tf.train.Example(features=tf.train.Features(feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/object/bbox/xmin': float_feature(xmin),
            'image/object/bbox/xmax': float_feature(xmax),
            'image/object/bbox/ymin': float_feature(ymin),
            'image/object/bbox/ymax': float_feature(ymax),
            'image/object/bbox/label': int64_feature(labels),
            'image/object/bbox/label_text': bytes_feature(labels_text),
            'image/object/bbox/difficult': int64_feature(difficult),
            'image/object/bbox/truncated': int64_feature(truncated),
            'image/format': bytes_feature(image_format),
            'image/encoded': bytes_feature(image_data)}))
    return example
#增加到tfrecord
def _add_to_tfrecord(dataset_dir, name, tfrecord_writer):
    
    image_data, shape, bboxes, labels, labels_text, difficult, truncated = \
        _process_image(dataset_dir, name)
    example = _convert_to_example(image_data, labels, labels_text,
                                  bboxes, shape, difficult, truncated)
    tfrecord_writer.write(example.SerializeToString())
#name為轉化檔案的字首
def _get_output_filename(output_dir, name, idx):
    return '%s/%s_%03d.tfrecord' % (output_dir, name, idx)

    
def run(dataset_dir, output_dir, name='voc_train', shuffling=False):
    
    if not tf.gfile.Exists(dataset_dir):
        tf.gfile.MakeDirs(dataset_dir)  
 
    path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS)
    filenames = sorted(os.listdir(path)) #排序
    # shuffling == Ture時,打亂順序
    if shuffling:
        random.seed(RANDOM_SEED)
        random.shuffle(filenames)
   
    i = 0
    fidx = 0
    while i < len(filenames):
        # Open new TFRecord file.
        tf_filename = _get_output_filename(output_dir, name, fidx)
        with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
            while i < len(filenames):
                sys.stdout.write(' Converting image %d/%d \n' % (i+1, len(filenames)))#終端列印,類似print
                sys.stdout.flush() #緩衝
 
                filename = filenames[i]
                img_name = filename[:-4]
                _add_to_tfrecord(dataset_dir, img_name, tfrecord_writer)
                i += 1
            fidx += 1
            
    print('\nFinished converting the Pascal VOC dataset!')
 
#原資料集路徑,輸出路徑以及輸出檔名
dataset_dir="./VOC2007/"
output_dir="./TFRecords"
name="voc_train"
def main(_):
    run(dataset_dir, output_dir,name)
 
if __name__ == '__main__':
    tf.app.run()
    

 

在獲得訓練好的模型,進行檢測時的demo.py如下: (較好)

#encoding:utf-8
import tensorflow as tf
import numpy as np
 
import os
from matplotlib import pyplot as plt
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_utils
 
#下載下來的模型的目錄
MODEL_DIR = 'object_detection/ssd_mobilenet_v1_coco_2018_01_28'
#下載下來的模型的檔案
MODEL_CHECK_FILE = os.path.join(MODEL_DIR, 'frozen_inference_graph.pb')
#資料集對於的label
MODEL_LABEL_MAP = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt')
#資料集分類數量,可以開啟mscoco_label_map.pbtxt檔案看看
MODEL_NUM_CLASSES = 90
 
#這裡是獲取例項圖片檔名,將其放到陣列中
PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images'
TEST_IMAGES_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 6)]
 
#輸出影象大小,單位是in
IMAGE_SIZE = (12, 8)
 
tf.reset_default_graph()
 
#將模型讀取到預設的圖中
with tf.gfile.GFile(MODEL_CHECK_FILE, 'rb') as fd:
    _graph = tf.GraphDef()
    _graph.ParseFromString(fd.read())
    tf.import_graph_def(_graph, name='')
 
#載入COCO資料標籤,將mscoco_label_map.pbtxt的內容轉換成
# {1: {'id': 1, 'name': u'person'}...90: {'id': 90, 'name': u'toothbrush'}}格式
label_map = label_map_util.load_labelmap(MODEL_LABEL_MAP)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=MODEL_NUM_CLASSES)
category_index = label_map_util.create_category_index(categories)
 
#將圖片轉化成numpy陣列形式
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
 
#在圖中開始計算
detection_graph = tf.get_default_graph()
with tf.Session(graph=detection_graph) as sess:
    for image_path in TEST_IMAGES_PATHS:
        print(image_path)
        #讀取圖片
        image = Image.open(image_path)
        #將圖片資料轉成陣列
        image_np = load_image_into_numpy_array(image)
        #增加一個維度
        image_np_expanded = np.expand_dims(image_np, axis=0)
        #下面都是獲取模型中的變數,直接使用就好了
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        #存放所有檢測框
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        #每個檢測結果的可信度
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        #每個框對應的類別
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        #檢測框的個數
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        #開始計算
        (boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections],
                                                            feed_dict={image_tensor : image_np_expanded})
        #列印識別結果
        print(num_detections)
        print(boxes)
        print(classes)
        print(scores)
 
        #得到視覺化結果
        vis_utils.visualize_boxes_and_labels_on_image_array(
            image_np,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8
        )
        #顯示
        plt.figure(figsize=IMAGE_SIZE)
        plt.imshow(image_np)
        plt.show()

恩,複習+鞏固!

sweet~