1. 程式人生 > >Kitti資料集轉成VOC(xml)資料集格式

Kitti資料集轉成VOC(xml)資料集格式

本文將kitti資料集中的檢測部分的資料標註轉換成voc格式的xml,用於生成caffe-ssd訓練的lmdb資料,具體資料集自己下載,標註圖片大概七千多張。

kitti格式標註及圖片轉voc格式標註及圖片(主要是扣掉了dontcare部分),
具體程式碼如下:

#!/usr/bin/env python
# -*- coding: utf8 -*-
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
import codecs
import os
import cv2

XML_EXT = '.xml'
ENCODE_METHOD = 'utf-8'

class PascalVocWriter:

    def __init__(self, foldername, filename, imgSize,databaseSrc='Unknown', localImgPath=None):
        self.foldername = foldername
        self.filename = filename
        self.databaseSrc = databaseSrc
        self.imgSize = imgSize
        self.boxlist = []
        self.localImgPath = localImgPath
        self.verified = False

    def prettify(self, elem):
        """
            Return a pretty-printed XML string for the Element.
        """
        rough_string = ElementTree.tostring(elem, 'utf8')
        root = etree.fromstring(rough_string)
        return etree.tostring(root, pretty_print=True, encoding=ENCODE_METHOD).replace("  ".encode(), "\t".encode())
        # minidom does not support UTF-8
        '''reparsed = minidom.parseString(rough_string)
        return reparsed.toprettyxml(indent="\t", encoding=ENCODE_METHOD)'''

    def genXML(self):
        """
            Return XML root
        """
        # Check conditions
        if self.filename is None or \
                self.foldername is None or \
                self.imgSize is None:
            return None

        top = Element('annotation')
        if self.verified:
            top.set('verified', 'yes')

        folder = SubElement(top, 'folder')
        folder.text = self.foldername

        filename = SubElement(top, 'filename')
        filename.text = self.filename

        if self.localImgPath is not None:
            localImgPath = SubElement(top, 'path')
            localImgPath.text = self.localImgPath

        source = SubElement(top, 'source')
        database = SubElement(source, 'database')
        database.text = self.databaseSrc

        size_part = SubElement(top, 'size')
        width = SubElement(size_part, 'width')
        height = SubElement(size_part, 'height')
        depth = SubElement(size_part, 'depth')
        width.text = str(self.imgSize[1])
        height.text = str(self.imgSize[0])
        if len(self.imgSize) == 3:
            depth.text = str(self.imgSize[2])
        else:
            depth.text = '1'

        segmented = SubElement(top, 'segmented')
        segmented.text = '0'
        return top

    def addBndBox(self, xmin, ymin, xmax, ymax, name, difficult):
        bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
        bndbox['name'] = name
        bndbox['difficult'] = difficult
        self.boxlist.append(bndbox)

    def appendObjects(self, top):
        for each_object in self.boxlist:
            object_item = SubElement(top, 'object')
            name = SubElement(object_item, 'name')
            try:
                name.text = unicode(each_object['name'])
            except NameError:
                # Py3: NameError: name 'unicode' is not defined
                name.text = each_object['name']
            pose = SubElement(object_item, 'pose')
            pose.text = "Unspecified"
            truncated = SubElement(object_item, 'truncated')
            if int(each_object['ymax']) == int(self.imgSize[0]) or (int(each_object['ymin'])== 1):
                truncated.text = "1" # max == height or min
            elif (int(each_object['xmax'])==int(self.imgSize[1])) or (int(each_object['xmin'])== 1):
                truncated.text = "1" # max == width or min
            else:
                truncated.text = "0"
            difficult = SubElement(object_item, 'difficult')
            difficult.text = str( bool(each_object['difficult']) & 1 )
            bndbox = SubElement(object_item, 'bndbox')
            xmin = SubElement(bndbox, 'xmin')
            xmin.text = str(each_object['xmin'])
            ymin = SubElement(bndbox, 'ymin')
            ymin.text = str(each_object['ymin'])
            xmax = SubElement(bndbox, 'xmax')
            xmax.text = str(each_object['xmax'])
            ymax = SubElement(bndbox, 'ymax')
            ymax.text = str(each_object['ymax'])

    def save(self, targetFile=None):
        root = self.genXML()
        self.appendObjects(root)
        out_file = None
        if targetFile is None:
            out_file = codecs.open(
                self.filename + XML_EXT, 'w', encoding=ENCODE_METHOD)
        else:
            out_file = codecs.open(targetFile, 'w', encoding=ENCODE_METHOD)

        prettifyResult = self.prettify(root)
        out_file.write(prettifyResult.decode('utf8'))
        out_file.close()

class kitti2voc:
    """
        transfer the kitti dataset to voc dataset ,erase the "donot care" part of original images 
    """
    def __init__(self,image_path_src , image_path_dst , kitti_anat_path , voc_anat_path):
        """
            indicate the path 
        """
        self.image_path_src = image_path_src
        self.image_path_dst = image_path_dst
        self.kitti_anat_path = kitti_anat_path
        self.voc_anat_path   = voc_anat_path
    def convert_process(self):
        """
            to transfer the anat
        """
        #read the anat 
        filter_item = ["DontCare"]
        for filename in os.listdir(self.kitti_anat_path):
            print(filename)
            image_name = filename.replace(".txt",".png")
            image_file = image_path_src+image_name
            img = cv2.imread(image_file)
            image_shape=img.shape#a tuple
            #define the xml instance
            PascalVocWriter_ins = PascalVocWriter(self.voc_anat_path,image_name,image_shape)
            count = 0 
            kitti_ant = open(self.kitti_anat_path+filename,'r')
            for line in kitti_ant:
                item_list = line.split(' ')
                if item_list[0] not in filter_item :
                    count += 1 
                    PascalVocWriter_ins.addBndBox(int(float(item_list[4])),int(float(item_list[5])),int(float(item_list[6])),int(float(item_list[7])),item_list[0],0)
                else:

                    img[int(float(item_list[5])):int(float(item_list[7])),int(float(item_list[4])):int(float(item_list[6])),:] =0
                    #data = "the coordinate of anchor xm{},xma{},ym{},yma{}".format(int(float(item_list[4])),int(float(item_list[6])),int(float(item_list[5])),int(float(item_list[7])))
                    #print(data)
                    #img[503:590,169:190,:]=255
                    #print('imhere') 

            if count>0:
                PascalVocWriter_ins.save(voc_anat_path+filename.replace(".txt",".xml"))
                cv2.imwrite(image_path_dst+image_name,img)


if __name__ =='__main__':
    #you must change the path  to yourselves ,and run 'python convert_k_x.py',the python version is 3.6 
    image_path_src = "/media/wq/新加捲/Dataset/KITTI/data_object_image_2/training/image_2/"
    image_path_dst = "./image_dst/"
    kitti_anat_path= "/media/wq/新加捲/Dataset/KITTI/training/label_2/"
    voc_anat_path = "/media/wq/新加捲/Dataset/kitti_voc/Annotations/"
    kitti2voc_ins = kitti2voc(image_path_src,image_path_dst,kitti_anat_path,voc_anat_path)
    kitti2voc_ins.convert_process()
    print ("convert over")

需要更改的部分就是四處:
image_path_src:kitti圖片路徑
image_path_dst:想要生成的voc圖片路徑
kitti_anat_path:kitti標註資訊路徑
voc_anat_path:voc標註資訊路徑
改成你自己的路徑就可以了。
另外需要注意的是,kitti圖片格式為png的,voc的為jpg的,需要注意這一點。

有了圖片和標註資訊後,其他的處理,在網上就比較多了,若有問題,歡迎交流。