1. 程式人生 > >(ILSVRC2012)imagenet2012資料集下載與處理

(ILSVRC2012)imagenet2012資料集下載與處理

1.訓練集的資料亮已經足夠了,所以我只下載了訓練集,下載地址:

http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train.tar   2.資料集中圖片類別是用wordnet編碼進行命名的,wordnet編碼與實際的語義對映,參考以下教程: https://www.quora.com/Where-can-I-find-the-semantic-labels-for-the-1000-ImageNet-ILSVRC2012-classes-codes https://blog.csdn.net/l_ml_m_lm_m/article/details/80338486
  3.對下載好的資料集圖片進行縮小,劃分訓練/驗證/測試集
import os
import glob
from PIL import Image
import random

#create a dir for save uncompress files
uncompress_path = 'imagenet2012'
os.system('mkdir '+uncompress_path)

#uncompress all_tars
all_tars = glob.glob('*.tar')
for tar_file in
all_tars: #uncompress print('uncompress '+tar_file+' ...') s1, _ = tar_file.split('.') os.system('mkdir '+uncompress_path+'/'+s1) os.system('tar -xf '+tar_file+' -C '+uncompress_path+'/'+s1) #resize images all_images = glob.glob(uncompress_path+'/'+s1+'/*') for image_file in
all_images: im = Image.open(image_file) im = im.resize((84, 84), resample=Image.LANCZOS) #image_file rename #TODO: im.save(image_file) #put in correct directory all_classes = glob.glob(uncompress_path+'/*') all_classes_num = len(all_classes) trian_classes_num = int(all_classes_num*0.64) val_classes_num = int(all_classes_num*0.16) test_classes_num = all_classes_num - trian_classes_num - val_classes_num #trian_directory fill os.system('mkdir '+uncompress_path+'/train') trian_classes = random.sample(all_classes,trian_classes_num) for train_class in trian_classes: print('mv '+train_class+' to trian_directory...') os.system('mv '+train_class+' '+uncompress_path+'/train') all_classes.remove(train_class) #val_directory fill os.system('mkdir '+uncompress_path+'/val') val_classes = random.sample(all_classes,val_classes_num) for val_class in val_classes: print('mv '+val_class+' to val_directory...') os.system('mv '+val_class+' '+uncompress_path+'/val') all_classes.remove(val_class) #test_directory fill os.system('mkdir '+uncompress_path+'/test') test_classes = random.sample(all_classes,test_classes_num) for test_class in test_classes: print('mv '+test_class+' to test_directory...') os.system('mv '+test_class+' '+uncompress_path+'/test') all_classes.remove(test_class) #compress result print('compress result...') os.system('zip -r '+uncompress_path+'.zip '+uncompress_path) #success print('proc success!!!')