影象識別資料集處理——python 檔案操作
阿新 • • 發佈:2018-11-16
通過 excel 獲取資料集
資料集放在一個總資料夾中,excel中對影象標識做了記錄,我們需要通過讀取excel中的影象檔名來獲取相應的影象。
例項檔案結構如下:
源目錄
├── 二級目錄1
│ ├──example_01.jpg
│ └──example_02.jpg
├── 二級目錄2
│ ├──example_03.jpg
│ ├──example_04.jpg
│ └──example_05.jpg
目標目錄
excel 檔案:img_list.xlsx
# -*- coding: utf-8 -*-
import xlrd
import os
import shutil
def read_excel(excel_path):
workbook = xlrd.open_workbook(excel_path)
sheet = workbook.sheet_by_index(0)
nrows = sheet.nrows
list1 = []
for i in range(nrows):
list0 = str(sheet.row_values(i)[0])
list1.append(list0[-14:])
return list1
def file_and_forder (original_path):
folder_filename_list = []
for root_dir,dir_name,filenames in os.walk(original_path):
for filename in filenames:
folder_filename_list.append(root_dir+"/"+filename)
return folder_filename_list
def copy_img_move(original_path,Target_path,excel_path):
list1 = read_excel(excel_path)
folder_filename_list = file_and_forder(original_path)
for filename_single in folder_filename_list:
print("filename_single",filename_single)
if filename_single[-14:] in list1:
shutil.copy(filename_single,Target_path)
print("處理完成!")
if __name__ == '__main__':
copy_img_move("./源目錄","./目標目錄","img_list.xlsx")
通過 json 獲取資料集
import json
import shutil
import os
from glob import glob
from tqdm import tqdm
# 該json檔案中種類,先生成59個資料夾,用於放置影象檔案
try:
for i in range(0,59):
os.mkdir("./data/train/" + str(i))
except:
pass
file_train = json.load(open("./data/labels/train_annotations.json","r",encoding="utf-8"))
file_val = json.load(open("./data/labels/validation_annotations.json","r",encoding="utf-8"))
file_list = file_train + file_val
for file in tqdm(file_list):
filename = file["image_id"]
origin_path = "./data/images/" + filename
ids = file["disease_class"]
if ids == 44:
continue
if ids == 45:
continue
if ids > 45:
ids = ids -2
save_path = "./data/train/" + str(ids) + "/"
shutil.copy(origin_path,save_path)