1. 程式人生 > >caffe 教程 Fine-tuning a Pretrained Network for Style Recognition下載資料

caffe 教程 Fine-tuning a Pretrained Network for Style Recognition下載資料

問題:執行python examples/finetune_flickr_style/assemble_data.py --workers=1 --images=2000 --seed 831486命令下載Flickr Style資料,然而提示:Writing train/val for 0 successfully downloaded images.,檢視caffe/data/flickr_style/images目錄下並沒有下載到資料集
原因:1)原來的程式碼使用多執行緒、多程序;2)Python2 和Python3的語法不相容
解決:修改caffe/examples/finetune_flickr_style/assemble_data.py 為如下:

Form a subset of the Flickr Style data, download images to dirname, and write 
Caffe ImagesDataLayer training file. 
import os  
import urllib.request #修改,Python3使用import urllib.request,Python2使用import urllib
import hashlib  
import argparse  
import numpy as np  
import pandas as pd  
skimage import io import multiprocessing import socket # Flickr returns a special image if the request is unavailable. MISSING_IMAGE_SHA1 = '6a92790b1c2a301c6e7ddef645dca1f53ea97ac2' example_dirname = os.path.abspath(os.path.dirname(__file__)) caffe_dirname = os.path.abspath(os.path.join(example_dirname, '../..'
)) training_dirname = os.path.join(caffe_dirname, 'data/flickr_style') #修改,將原來的download_image函式修改為mydownload_image函式 def mydownload_image(args_tuple): try: url, filename = args_tuple if not os.path.exists(filename): urllib.request.urlretrieve(url, filename) #修改,Python3 使用urllib.request,Python2 使用urllib return True except KeyboardInterrupt: raise Exception() except: return False if __name__ == '__main__': parser = argparse.ArgumentParser( description='Download a subset of Flickr Style to a directory') parser.add_argument( '-s', '--seed', type=int, default=0, help="random seed") parser.add_argument( '-i', '--images', type=int, default=-1, help="number of images to use (-1 for all [default])", ) parser.add_argument( '-w', '--workers', type=int, default=-1, help="num workers used to download images. -x uses (all - x) cores [-1 default]." ) parser.add_argument( '-l', '--labels', type=int, default=0, help="if set to a positive value, only sample images from the first number of labels." ) args = parser.parse_args() np.random.seed(args.seed) # Read data, shuffle order, and subsample. csv_filename = os.path.join(example_dirname, 'flickr_style.csv.gz') df = pd.read_csv(csv_filename, index_col=0, compression='gzip') df = df.iloc[np.random.permutation(df.shape[0])] if args.labels > 0: df = df.loc[df['label'] < args.labels] if args.images > 0 and args.images < df.shape[0]: df = df.iloc[:args.images] # Make directory for images and get local filenames. if training_dirname is None: training_dirname = os.path.join(caffe_dirname, 'data/flickr_style') images_dirname = os.path.join(training_dirname, 'images') if not os.path.exists(images_dirname): os.makedirs(images_dirname) df['image_filename'] = [ os.path.join(images_dirname, _.split('/')[-1]) for _ in df['image_url'] ] # Download images. num_workers = args.workers if num_workers <= 0: num_workers = multiprocessing.cpu_count() + num_workers print('Downloading {} images with {} workers...'.format( df.shape[0], num_workers)) #pool = multiprocessing.Pool(processes=num_workers) #修改,註釋掉原來的多執行緒、多程序使用 map_args = zip(df['image_url'], df['image_filename']) #results = pool.map(download_image, map_args) #修改,註釋掉原來的多執行緒、多程序使用 socket.setdefaulttimeout(6) results = [] for item in map_args: value = mydownload_image(item) #呼叫mydownload_image函式一個一個下載圖片 results.append(value) if value == False: print('Flase') else: print('1') # Only keep rows with valid images, and write out training file lists. print(len(results)) df = df[results] for split in ['train', 'test']: split_df = df[df['_split'] == split] filename = os.path.join(training_dirname, '{}.txt'.format(split)) split_df[['image_filename', 'label']].to_csv( filename, sep=' ', header=None, index=None) print('Writing train/val for {} successfully downloaded images.'.format( df.shape[0]))

執行python examples/finetune_flickr_style/assemble_data.py --workers=1 --images=2000 --seed 831486命令成功下載Flickr Style資料,檢視caffe/data/flickr_style/images目錄下載到資料集。