【python】小目標檢測中對一幅高解析度圖分塊且改變目標bounding box的座標
阿新 • • 發佈:2018-12-21
很多時候,在小目標檢測中,對於一副高解析度影象,我們很難直接輸入一整幅大圖來進行目標檢測,特別是對於one-stage的方法,如SSD的輸入尺寸是300和512, YOLO的輸入尺寸是416,而高解析度影象通常有幾千×幾千畫素。所以我在此分享將一副高解析度影象分塊同時寫入對應目標的boundingbox改變後的座標到xml檔案中。
import torch import pickle import sys import os import cv2 import numpy as np import os.path import torch.utils.data as data import torchvision.transforms as transforms from PIL import Image from xml.dom.minidom import Document from tqdm import tqdm if sys.version_info[0] ==2: import xml.etree.cElementTree as ET else: import xml.etree.ElementTree as ET origin_dir = '原影象存放地址' target_dir1 = '分塊影象存放地址' annota_dir = '原boundingbox的xml檔案存放地址' target_dir2 = '分塊boundingbox的xml檔案存放地址' def clip_img(No, oriname): from_name = os.path.join(origin_dir, oriname+'.jpg') img = cv2.imread(from_name) h_ori,w_ori, _ =img.shape#儲存原圖的大小 img = cv2.resize(img, (2048, 2048))#可以resize也可以不resize,看情況而定 h, w, _ = img.shape xml_name = os.path.join(annota_dir, oriname+'.xml')#讀取每個原影象的xml檔案 xml_ori = ET.parse(xml_name).getroot() res = np.empty((0,5))#存放座標的四個值和類別 for obj in xml_ori.iter('object'): difficult = int(obj.find('difficult').text) == 1 if difficult: continue name = obj.find('name').text.lower().strip() bbox = obj.find('bndbox') pts = ['xmin', 'ymin', 'xmax', 'ymax'] bndbox = [] for i, pt in enumerate(pts): cur_pt = int(bbox.find(pt).text) - 1 cur_pt = int(cur_pt*h/h_ori) if i%2==1 else int(cur_pt * w / w_ori) bndbox.append(cur_pt) #label_idx = self.class_to_ind[name] bndbox.append(name) res = np.vstack((res, bndbox)) i = 0 win_size = 256#分塊的大小 stride = 128#重疊的大小,設定這個可以使分塊有重疊 for r in range(0, h - win_size, stride): for c in range(0, w - win_size, stride): flag = np.zeros([1,10]) youwu = False xiefou = True tmp = img[r: r+win_size, c: c+win_size] for re in range(res.shape[0]): xmin,ymin,xmax,ymax,label = res[re] if int(xmin)>=c and int(xmax) <=c+win_size and int(ymin)>=r and int(ymax)<=r+win_size: flag[0][re] = 1 youwu = True elif int(xmin)<c or int(xmax) >c+win_size or int(ymin) < r or int(ymax) > r+win_size: pass else: xiefou = False break; if xiefou:#如果物體被分割了,則忽略不寫入 if youwu:#有物體則寫入xml檔案 doc = Document() annotation = doc.createElement('annotation') doc.appendChild(annotation) for re in range(res.shape[0]): xmin,ymin,xmax,ymax,label = res[re] xmin=int(xmin) ymin=int(ymin) xmax=int(xmax) ymax=int(ymax) if flag[0][re] == 1: xmin=str(xmin-c) ymin=str(ymin-r) xmax=str(xmax-c) ymax=str(ymax-r) object_charu = doc.createElement('object') annotation.appendChild(object_charu) name_charu = doc.createElement('name') name_charu_text = doc.createTextNode(label) name_charu.appendChild(name_charu_text) object_charu.appendChild(name_charu) dif = doc.createElement('difficult') dif_text = doc.createTextNode('0') dif.appendChild(dif_text) object_charu.appendChild(dif) bndbox = doc.createElement('bndbox') object_charu.appendChild(bndbox) xmin1 = doc.createElement('xmin') xmin_text = doc.createTextNode(xmin) xmin1.appendChild(xmin_text) bndbox.appendChild(xmin1) ymin1 = doc.createElement('ymin') ymin_text = doc.createTextNode(ymin) ymin1.appendChild(ymin_text) bndbox.appendChild(ymin1) xmax1 = doc.createElement('xmax') xmax_text = doc.createTextNode(xmax) xmax1.appendChild(xmax_text) bndbox.appendChild(xmax1) ymax1 = doc.createElement('ymax') ymax_text = doc.createTextNode(ymax) ymax1.appendChild(ymax_text) bndbox.appendChild(ymax1) else: continue xml_name = oriname+'_%3d.xml' % (i) to_xml_name = os.path.join(target_dir2, xml_name) with open(to_xml_name, 'wb+') as f: f.write(doc.toprettyxml(indent="\t", encoding='utf-8')) #name = '%02d_%02d_%02d_.bmp' % (No, int(r/win_size), int(c/win_size)) img_name = oriname+'_%3d.jpg' %(i) to_name = os.path.join(target_dir1, img_name) i = i+1 cv2.imwrite(to_name, tmp) for No, name in tqdm(enumerate(os.listdir(origin_dir))): clip_img(No, name.rstrip('.jpg'))
這樣就將一個大圖分塊且儲存了座標xml檔案。
注意xml檔案的key視情況而定,不是通用的。