1. 程式人生 > >【python】小目標檢測中對一幅高解析度圖分塊且改變目標bounding box的座標

【python】小目標檢測中對一幅高解析度圖分塊且改變目標bounding box的座標

很多時候,在小目標檢測中,對於一副高解析度影象,我們很難直接輸入一整幅大圖來進行目標檢測,特別是對於one-stage的方法,如SSD的輸入尺寸是300和512, YOLO的輸入尺寸是416,而高解析度影象通常有幾千×幾千畫素。所以我在此分享將一副高解析度影象分塊同時寫入對應目標的boundingbox改變後的座標到xml檔案中。

import torch
import pickle
import sys
import os
import cv2
import numpy as np
import os.path
import torch.utils.data as data
import torchvision.transforms as transforms
from PIL import Image
from xml.dom.minidom import Document
from tqdm import tqdm
if sys.version_info[0] ==2:
    import xml.etree.cElementTree as ET
else:
    import xml.etree.ElementTree as ET
origin_dir = '原影象存放地址'
target_dir1 = '分塊影象存放地址'
annota_dir = '原boundingbox的xml檔案存放地址'
target_dir2 = '分塊boundingbox的xml檔案存放地址'
def clip_img(No, oriname):
    from_name = os.path.join(origin_dir, oriname+'.jpg')
    img = cv2.imread(from_name)
    h_ori,w_ori, _ =img.shape#儲存原圖的大小
    img = cv2.resize(img, (2048, 2048))#可以resize也可以不resize,看情況而定
    h, w, _ = img.shape
    xml_name = os.path.join(annota_dir, oriname+'.xml')#讀取每個原影象的xml檔案
    xml_ori = ET.parse(xml_name).getroot()
    res = np.empty((0,5))#存放座標的四個值和類別
    for obj in xml_ori.iter('object'):
        difficult = int(obj.find('difficult').text) == 1
        if difficult:
            continue
        name = obj.find('name').text.lower().strip()
        bbox = obj.find('bndbox')
        pts = ['xmin', 'ymin', 'xmax', 'ymax']
        bndbox = []
        for i, pt in enumerate(pts):
            cur_pt = int(bbox.find(pt).text) - 1
            cur_pt = int(cur_pt*h/h_ori) if i%2==1 else int(cur_pt * w / w_ori)
            bndbox.append(cur_pt)
        #label_idx = self.class_to_ind[name]
        bndbox.append(name)
        res = np.vstack((res, bndbox))
    i = 0
    win_size = 256#分塊的大小
    stride = 128#重疊的大小,設定這個可以使分塊有重疊
    for r in range(0, h - win_size, stride):
        for c in range(0, w - win_size, stride):
            flag = np.zeros([1,10])
            youwu = False
            xiefou = True
            tmp = img[r: r+win_size, c: c+win_size]
            for re in range(res.shape[0]):
                xmin,ymin,xmax,ymax,label = res[re]
                if int(xmin)>=c and int(xmax) <=c+win_size and int(ymin)>=r and int(ymax)<=r+win_size:
                    flag[0][re] = 1
                    youwu = True
                elif int(xmin)<c or int(xmax) >c+win_size or int(ymin) < r or int(ymax) > r+win_size:
                    pass
                else:
                    xiefou = False
                    break;
            if xiefou:#如果物體被分割了,則忽略不寫入
                if youwu:#有物體則寫入xml檔案
                    doc = Document()
                    annotation = doc.createElement('annotation')
                    doc.appendChild(annotation)
                    for re in range(res.shape[0]):
                        xmin,ymin,xmax,ymax,label = res[re]
                        xmin=int(xmin)
                        ymin=int(ymin)
                        xmax=int(xmax)
                        ymax=int(ymax)
                        if flag[0][re] == 1:
                            xmin=str(xmin-c)
                            ymin=str(ymin-r)
                            xmax=str(xmax-c)
                            ymax=str(ymax-r)
                            object_charu = doc.createElement('object')
                            annotation.appendChild(object_charu)
                            name_charu = doc.createElement('name')
                            name_charu_text = doc.createTextNode(label)
                            name_charu.appendChild(name_charu_text)
                            object_charu.appendChild(name_charu)
                            dif = doc.createElement('difficult')
                            dif_text = doc.createTextNode('0')
                            dif.appendChild(dif_text)
                            object_charu.appendChild(dif)
                            bndbox = doc.createElement('bndbox')
                            object_charu.appendChild(bndbox)
                            xmin1 = doc.createElement('xmin')
                            xmin_text = doc.createTextNode(xmin)
                            xmin1.appendChild(xmin_text)
                            bndbox.appendChild(xmin1)
                            ymin1 = doc.createElement('ymin')
                            ymin_text = doc.createTextNode(ymin)
                            ymin1.appendChild(ymin_text)
                            bndbox.appendChild(ymin1)
                            xmax1 = doc.createElement('xmax')
                            xmax_text = doc.createTextNode(xmax)
                            xmax1.appendChild(xmax_text)
                            bndbox.appendChild(xmax1)
                            ymax1 = doc.createElement('ymax')
                            ymax_text = doc.createTextNode(ymax)
                            ymax1.appendChild(ymax_text)
                            bndbox.appendChild(ymax1)
                        else:
                            continue
                    xml_name = oriname+'_%3d.xml' % (i)
                    to_xml_name = os.path.join(target_dir2, xml_name)
                    with open(to_xml_name, 'wb+') as f:
                        f.write(doc.toprettyxml(indent="\t", encoding='utf-8'))
                    #name = '%02d_%02d_%02d_.bmp' % (No, int(r/win_size), int(c/win_size))
                    img_name = oriname+'_%3d.jpg' %(i)
                    to_name = os.path.join(target_dir1, img_name)
                    i = i+1
                    cv2.imwrite(to_name, tmp)
for No, name in tqdm(enumerate(os.listdir(origin_dir))):
    clip_img(No, name.rstrip('.jpg'))

這樣就將一個大圖分塊且儲存了座標xml檔案。
注意xml檔案的key視情況而定,不是通用的。