1. 程式人生 > >python解析並修改xml檔案

python解析並修改xml檔案

使用labelImg標註圖片後需要統一修改圖片label的名稱和圖片名,所以用python批量修改圖片名和xml檔案。
首先批量複製並修改圖片名稱:

# -*- coding: utf-8 -*-
# 將二級目錄下的圖片/標籤檔案重新命名到同一個新資料夾下

import os, shutil

def batchRenameFile(srcDirName, destDirName):  # srcDirName 為原始檔夾的絕對路徑,真正儲存資料檔案的子資料夾都在該資料夾下;destDirName 為目標資料夾的絕對路徑
    subDirNameList = os.listdir(srcDirName)  # 獲取真正儲存資料檔案的資料夾序列
for subDirName in subDirNameList: fileList = os.listdir(srcDirName+'/'+subDirName) # 此處須給出絕對路徑 i = 1 for file in fileList: shutil.copy(srcDirName+'/'+subDirName+'/'+file, destDirName+'/1_'+subDirName+'_'+str(i)+'.jpg') # 此處須給出絕對路徑 print(destDirName+'/1_'
+subDirName+'_'+str(i)+'.jpg') i = i+1

接著從txt檔案中生成數字和標籤對應的字典:

def creatDic():
    txtDict = {}
    DirFile = 'E:\Cats&Dogs\CatList.txt'
    dicFile = open(DirFile,'r')
    while True:
        line = dicFile.readline()
        if '\xef\xbb\xbf' in line:
            line = line.replace
('\xef\xbb\xbf', '') if line == '': break key = line.split('\t')[0] # print(key) value = line.split('\t')[-1] # print(value) txtDict[key] = value # 加入字典 dicFile.close() DirFile = 'E:\Cats&Dogs\DogList.txt' dicFile = open(DirFile, 'r') while True: line = dicFile.readline() if '\xef\xbb\xbf' in line: line = line.replace('\xef\xbb\xbf', '') if line == '': break key = line.split('\t')[0] value = line.split('\t')[-1].split('\n')[0] txtDict[str(int(key)+int(42))] = value # 加入字典 dicFile.close() return txtDict

最後批量修改xml檔案中對圖片打的標籤名稱和與xml對應的圖片名稱:

def batchRenameFile1(DirName,txtDict):  # DirName 為資料夾的絕對路徑

    FileList = os.listdir(DirName)

    for FileName in FileList:
        FilePath = DirName+'\\'+FileName
        print(FilePath)
        doc = parse(FilePath)
        root = doc.getroot()
        sub1 = root.find("filename")
        name = FileName.split(".")[0] + ".jpg"
        sub1.text = name
        sub2 = root.find("path")
        sub2.text = "E:\myVOCdevkit\VOC2007\\images\\"+name
        species = FileName.split("_")[0]
        label = FileName.split("_")[1]
        if species == "1":                 # cat
            sub3 = root.find("folder")
            sub3.text = txtDict[label]
            for sub4 in root.findall("object"):   ##找到root節點下的所有object節點,因為有不止一個名字叫object的節點
                subsub=sub4.find('name')
                subsub.text = txtDict[label]
        if species == "2":                 # dog
            sub3 = root.find("folder")
            sub3.text = txtDict[str(int(label)+int(42))]
            for sub4 in root.findall("object"):
                subsub=sub4.find('name')
                subsub.text = txtDict[str(int(label)+int(42))]

        doc.write(FilePath)