1. 程式人生 > >python 將yaml標籤檔案轉化為xml格式的標籤檔案

python 將yaml標籤檔案轉化為xml格式的標籤檔案

最近在跑SSD和Faster R-CNN深度學習程式碼,下載了一些資料集,但是這些資料集標籤檔案不是xml格式檔案,而是yaml檔案,雖然網上有線上轉化的工具,但是這種做法對我來說顯然是很低效率的。為了提高效率,自己寫了相關的程式碼。現在分享給大家。

感謝Bosch Small Traffic Lights Dataset 提供的部分程式碼。

以下是label_images.py程式:

#!/usr/bin/env python
 """
Example usage:
    python label_images.py input.yaml [output_folder]
"""
import sys
import os
import cv2
from read_label_file import get_all_labels
from WriteFileXml import writeInfoToXml

def ir(some_value):
    """Int-round function for short array indexing """
    return int(round(some_value))

def show_label_images(input_yaml, output_folder=None):
    images = get_all_labels(input_yaml)

    if output_folder is not None:
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

    for i, image_dict in enumerate(images):
        #print image_dict['boxes']
        image = cv2.imread(image_dict['path'])
        img_size=image.shape

        if image is None:
            raise IOError('Could not open image path', image_dict['path'])

        dirname,image_name=os.path.split(image_dict['path'])    #get name of image
        XmlName = image_name.split('.',1)[0] + '.xml' #get name of label
        writeInfoToXml(XmlName,image_name,image_dict,img_size,output_folder)  #covert yaml to xml


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print(__doc__)
        sys.exit(-1)
    label_file = sys.argv[1]
    output_folder = None if len(sys.argv) < 3 else sys.argv[2]#output_folder is path to store xml
    show_label_images(label_file, output_folder)

而下面是WriteFileXml.py程式,該程式是生成xml格式的標籤檔案,仿照pascal voc中的label的格式。

# -*- coding: utf-8 -*-
# @Time    : 18-5-23 上午6:58
# @Author  : lei liu
# @Blog    :https://blog.csdn.net/T1243_3
# coding=utf-8

from xml.dom.minidom import Document
import os

    # 將self.orderDict中的資訊寫入本地xml檔案,引數filename是xml檔名
def writeInfoToXml(XmlName,imgname,image_dict,img_size,output_folder):
    # 建立dom文件
    doc = Document()

    orderlist = doc.createElement('annotation') # 建立根節點
    doc.appendChild(orderlist)      # 根節點插入dom樹

    folder = doc.createElement('folder')
    folder_text = doc.createTextNode('VOC2007')
    folder.appendChild(folder_text)
    orderlist.appendChild(folder)


    filename = doc.createElement('filename')   #imgname
    filename_text = doc.createTextNode(imgname)
    filename.appendChild(filename_text)
    orderlist.appendChild(filename)

    """
    在根節點annotation下建立子節點size,在size下建立width,height和depth節點
    """
    size = doc.createElement('size')  # imgsize 根size,子:width,height,depth
    orderlist.appendChild(size)

    width = doc.createElement('width') #imgsize->width
    width_text = doc.createTextNode(str(img_size[1]))
    width.appendChild(width_text)
    size.appendChild(width)

    height = doc.createElement('height')#imgsize->height
    height_text = doc.createTextNode(str(img_size[0]))
    height.appendChild(height_text)
    size.appendChild(height)

    depth = doc.createElement('depth')  # imgsize->depth
    depth_text = doc.createTextNode(str(img_size[2]))
    depth.appendChild(depth_text)
    size.appendChild(depth)


    for i in range(len(image_dict['boxes'])):
        object = doc.createElement('object')
        orderlist.appendChild(object)

        name = doc.createElement('name')  # object->name
        name_text = doc.createTextNode(str(image_dict['boxes'][i]['label']))
        name.appendChild(name_text)
        object.appendChild(name)

        bndbox = doc.createElement('bndbox')  # object->bndbox
        object.appendChild(bndbox)


        xmin = doc.createElement('xmin')  # labelsize->width
        xmin_text = doc.createTextNode(str(image_dict['boxes'][i]['x_min']))
        xmin.appendChild(xmin_text)
        bndbox.appendChild(xmin)

        ymin = doc.createElement('ymin')  # labelsize->height
        ymin_text = doc.createTextNode(str(image_dict['boxes'][i]['y_min']))
        ymin.appendChild(ymin_text)
        bndbox.appendChild(ymin)

        xmax = doc.createElement('xmax')  # labelsize->width
        xmax_text = doc.createTextNode(str(image_dict['boxes'][i]['x_max']))
        xmax.appendChild(xmax_text)
        bndbox.appendChild(xmax)

        ymax = doc.createElement('ymax')  # labelsize->width
        ymax_text = doc.createTextNode(str(image_dict['boxes'][i]['y_max']))
        ymax.appendChild(ymax_text)
        bndbox.appendChild(ymax)

    # 將dom物件寫入本地xml檔案
    with open(os.path.join(output_folder,XmlName), 'w') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))

if __name__ == '__main__':
    image_dict=[1]
    writeInfoToXml('/home/ubuntu/bstld-master/test1.xml','0001.png',image_dict)

最後生成的標籤檔案如下面xml檔案所示:

<?xml version="1.0" encoding="utf-8"?>
<annotation>
	<folder>VOC2007</folder>
	<filename>26420.png</filename>
	<size>
		<width>1280</width>
		<height>720</height>
		<depth>3</depth>
	</size>
	<object>
		<name>Green</name>
		<bndbox>
			<xmin>940.25</xmin>
			<ymin>242.625</ymin>
			<xmax>951.0</xmax>
			<ymax>277.25</ymax>
		</bndbox>
	</object>
	<object>
		<name>Green</name>
		<bndbox>
			<xmin>95.0</xmin>
			<ymin>250.0</ymin>
			<xmax>104.5</xmax>
			<ymax>285.625</ymax>
		</bndbox>
	</object>
</annotation>