【資料集】在批量xml標籤中選出指定標籤和對應圖片
阿新 • • 發佈:2019-01-14
1.博主從朋友那裡拿了一個VOC資料集,裡面有20個類,博主只要6個
classes={"one","two","three","four","five","fist"}
2.選出來後把對應圖片也找出來
# coding=utf-8 """選出指定標籤的xml檔案""" #"""選出對應標籤的圖片""" import os import os.path import xml.dom.minidom import shutil import sys path = "C:\\Users\\AS\\Desktop\\new\\Annotations" newpath="C:\\Users\\AS\\Desktop\\new\\label" label_path = "C:\\Users\\AS\\Desktop\\new\\label" image_path = "C:\\Users\\AS\\Desktop\\new\\JPEGImages" image_new_path ="C:\\Users\\AS\\Desktop\\new\\image" files = os.listdir(path) # 得到資料夾下所有檔名稱 # s = [] classes={"one","two","three","four","five","fist"} new =[] def selete_xml_file(): j = 1 for xmlFile in files: # 遍歷資料夾 j=j+1 fp = os.path.join(path, xmlFile) # print(fp) portion = os.path.splitext(xmlFile) if not os.path.isdir(xmlFile): # 判斷是否是資料夾,不是資料夾才打開 # print (xmlFile) # xml檔案讀取操作 # 將獲取的xml檔名送入到dom解析 dom = xml.dom.minidom.parse(os.path.join(path, xmlFile)) ###最核心的部分os.path.join(path,xmlFile),路徑拼接,輸入的是具體路徑 root = dom.documentElement name = root.getElementsByTagName('name') # print(name) # pose=root.getElementsByTagName('pose') # 重新命名class name for i in range(len(name)): # print (name[i].firstChild.data) # print(xmlFile) # if name[i].firstChild.data=="one": if name[i].firstChild.data in classes: # print("jjjjj") newfp = os.path.join(newpath, os.path.basename(fp)) shutil.copyfile(fp, newfp) print(j) # new.append(fp) def selete_image_file(): k= 0 # image_file = os.listdir(image_path) # list = os.listdir(newpath) list =[] for label in os.listdir(label_path): label = label.split('.',1)[0] list.append(label) for image in os.listdir(image_path): print(image) image_name= image.split('.',1)[0] print(image_name) if image_name in list: k=k+1 print(k) newfp = os.path.join(image_new_path, os.path.basename(image)) image = os.path.join(image_path, image) print(image) print(newfp) shutil.copyfile(image, newfp) print(k) selete_image_file() # selete_xml_file()