1. 程式人生 > >影象識別資料集處理——python 檔案操作

影象識別資料集處理——python 檔案操作

通過 excel 獲取資料集

資料集放在一個總資料夾中,excel中對影象標識做了記錄,我們需要通過讀取excel中的影象檔名來獲取相應的影象。
例項檔案結構如下:

源目錄
 ├── 二級目錄1
 │   ├──example_01.jpg
 │   └──example_02.jpg
 ├── 二級目錄2
 │   ├──example_03.jpg
 │   ├──example_04.jpg
 │   └──example_05.jpg
目標目錄

excel 檔案:img_list.xlsx
在這裡插入圖片描述

# -*- coding: utf-8 -*-
import xlrd
import
os import shutil def read_excel(excel_path): workbook = xlrd.open_workbook(excel_path) sheet = workbook.sheet_by_index(0) nrows = sheet.nrows list1 = [] for i in range(nrows): list0 = str(sheet.row_values(i)[0]) list1.append(list0[-14:]) return list1 def file_and_forder
(original_path): folder_filename_list = [] for root_dir,dir_name,filenames in os.walk(original_path): for filename in filenames: folder_filename_list.append(root_dir+"/"+filename) return folder_filename_list def copy_img_move(original_path,Target_path,excel_path): list1 =
read_excel(excel_path) folder_filename_list = file_and_forder(original_path) for filename_single in folder_filename_list: print("filename_single",filename_single) if filename_single[-14:] in list1: shutil.copy(filename_single,Target_path) print("處理完成!") if __name__ == '__main__': copy_img_move("./源目錄","./目標目錄","img_list.xlsx")

通過 json 獲取資料集

import json
import shutil
import os
from glob import glob
from tqdm import tqdm

# 該json檔案中種類,先生成59個資料夾,用於放置影象檔案
try:
    for i in range(0,59):
        os.mkdir("./data/train/" + str(i))
except:
    pass
    
file_train = json.load(open("./data/labels/train_annotations.json","r",encoding="utf-8"))
file_val = json.load(open("./data/labels/validation_annotations.json","r",encoding="utf-8"))

file_list = file_train + file_val

for file in tqdm(file_list):
    filename = file["image_id"]
    origin_path = "./data/images/" + filename
    ids = file["disease_class"]
    if ids ==  44:
        continue
    if ids == 45:
        continue
    if ids > 45:
        ids = ids -2
    save_path = "./data/train/" + str(ids) + "/"
    shutil.copy(origin_path,save_path)