1. 程式人生 > >【資料集】在批量xml標籤中選出指定標籤和對應圖片

【資料集】在批量xml標籤中選出指定標籤和對應圖片

1.博主從朋友那裡拿了一個VOC資料集,裡面有20個類,博主只要6個

classes={"one","two","three","four","five","fist"}

2.選出來後把對應圖片也找出來

# coding=utf-8
"""選出指定標籤的xml檔案"""
#"""選出對應標籤的圖片"""
import os
import os.path
import xml.dom.minidom
import shutil
import sys
path = "C:\\Users\\AS\\Desktop\\new\\Annotations"
newpath="C:\\Users\\AS\\Desktop\\new\\label"

label_path = "C:\\Users\\AS\\Desktop\\new\\label"
image_path = "C:\\Users\\AS\\Desktop\\new\\JPEGImages"
image_new_path ="C:\\Users\\AS\\Desktop\\new\\image"
files = os.listdir(path)  # 得到資料夾下所有檔名稱
# s = []
classes={"one","two","three","four","five","fist"}
new =[]

def selete_xml_file():
    j = 1
    for xmlFile in files:
        # 遍歷資料夾
        j=j+1
        fp = os.path.join(path, xmlFile)
        # print(fp)
        portion = os.path.splitext(xmlFile)
        if not os.path.isdir(xmlFile):
            # 判斷是否是資料夾,不是資料夾才打開
            # print (xmlFile)

            # xml檔案讀取操作

            # 將獲取的xml檔名送入到dom解析
            dom = xml.dom.minidom.parse(os.path.join(path, xmlFile))
            ###最核心的部分os.path.join(path,xmlFile),路徑拼接,輸入的是具體路徑
            root = dom.documentElement
            name = root.getElementsByTagName('name')
            # print(name)
            # pose=root.getElementsByTagName('pose')
            # 重新命名class name
            for i in range(len(name)):
                # print (name[i].firstChild.data)
                # print(xmlFile)
                # if name[i].firstChild.data=="one":
                if name[i].firstChild.data in classes:
                    # print("jjjjj")
                    newfp = os.path.join(newpath, os.path.basename(fp))
                    shutil.copyfile(fp, newfp)
                    print(j)
                    # new.append(fp)

def selete_image_file():
    k= 0
    # image_file = os.listdir(image_path)
    # list = os.listdir(newpath)
    list =[]
    for label in os.listdir(label_path):
        label = label.split('.',1)[0]
        list.append(label)

    for image in os.listdir(image_path):
        print(image)
        image_name= image.split('.',1)[0]
        print(image_name)
        if image_name in list:
            k=k+1
            print(k)
            newfp = os.path.join(image_new_path, os.path.basename(image))
            image = os.path.join(image_path, image)
            print(image)
            print(newfp)
            shutil.copyfile(image, newfp)
            print(k)

selete_image_file()
# selete_xml_file()