faster-rcnn 之訓練指令碼解析:./tools/train_faster_rcnn_alt_opt.py
阿新 • • 發佈:2019-01-09
【說明】:歡迎加入:faster-rcnn 交流群 238138700,本文分析faster-rcnn 訓練的python指令碼;
【debug】:我是把__main__修改了下,放在一個自己定義的函式裡面,然後呼叫debug一步步看執行效果的,讀者不妨也這樣做,可以清晰看到程式是如何執行的;
#!/usr/bin/env python # -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Train a Faster R-CNN network using alternating optimization. This tool implements the alternating optimization algorithm described in our NIPS 2015 paper ("Faster R-CNN: Towards Real-time Object Detection with Region Proposal Networks." Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun.) """ import _init_paths from fast_rcnn.train import get_training_roidb, train_net from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir from datasets.factory import get_imdb from rpn.generate import imdb_proposals import argparse import pprint import numpy as np import sys, os import multiprocessing as mp import cPickle import shutil def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Train a Faster R-CNN network') parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=0, type=int) parser.add_argument('--net_name', dest='net_name', help='network name (e.g., "ZF")', default='ZF', type=str) #qyy parser.add_argument('--weights', dest='pretrained_model', help='initialize with pretrained model weights', default='./data/imagenet_models/ZF.v2.caffemodel', type=str) #qyy parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default='./experiments/cfgs/faster_rcnn_alt_opt.yml', type=str)# qyy parser.add_argument('--imdb', dest='imdb_name', help='dataset to train on', default='voc_2007_trainval', type=str) parser.add_argument('--set', dest='set_cfgs', help='set config keys', default=None, nargs=argparse.REMAINDER) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args def get_roidb(imdb_name, rpn_file=None): imdb = get_imdb(imdb_name) print 'Loaded dataset `{:s}` for training'.format(imdb.name) imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) if rpn_file is not None: imdb.config['rpn_file'] = rpn_file roidb = get_training_roidb(imdb) return roidb, imdb def get_solvers(net_name): # Faster R-CNN Alternating Optimization n = 'faster_rcnn_alt_opt' # Solver for each training stage solvers = [[net_name, n, 'stage1_rpn_solver60k80k.pt'], [net_name, n, 'stage1_fast_rcnn_solver30k40k.pt'], [net_name, n, 'stage2_rpn_solver60k80k.pt'], [net_name, n, 'stage2_fast_rcnn_solver30k40k.pt']] solvers = [os.path.join(cfg.MODELS_DIR, *s) for s in solvers] # Iterations for each training stage max_iters = [80000, 40000, 80000, 40000] # max_iters = [100, 100, 100, 100] # Test prototxt for the RPN rpn_test_prototxt = os.path.join( cfg.MODELS_DIR, net_name, n, 'rpn_test.pt') return solvers, max_iters, rpn_test_prototxt # ------------------------------------------------------------------------------ # Pycaffe doesn't reliably free GPU memory when instantiated nets are discarded # (e.g. "del net" in Python code). To work around this issue, each training # stage is executed in a separate process using multiprocessing.Process. # ------------------------------------------------------------------------------ def _init_caffe(cfg): """Initialize pycaffe in a training process. """ import caffe # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) def train_rpn(queue=None, imdb_name=None, init_model=None, solver=None, max_iters=None, cfg=None): """Train a Region Proposal Network in a separate training process. """ #首先進來後繼續配置了一些cfg這個物件的一些引數 # Not using any proposals, just ground-truth boxes cfg.TRAIN.HAS_RPN = True cfg.TRAIN.BBOX_REG = False # applies only to Fast R-CNN bbox regression cfg.TRAIN.PROPOSAL_METHOD = 'gt' cfg.TRAIN.IMS_PER_BATCH = 1 print 'Init model: {}'.format(init_model) #格式化輸出字串 print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) #這裡是關鍵,準備資料集,我們在debug的時候可以發現,imdb是一個類,而roidb是該類的一個成員 roidb, imdb = get_roidb(imdb_name)#我們進入這個資料準備的函式看看 print 'roidb len: {}'.format(len(roidb)) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) #這個solver傳入的是./models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt model_paths = train_net(solver, roidb, output_dir, pretrained_model=init_model, max_iters=max_iters) #進入train_net函式,看訓練如何實現的 # Cleanup all but the final model for i in model_paths[:-1]: #把訓練過程中儲存的中間結果的模型刪掉,只返回最終模型的結果 os.remove(i) rpn_model_path = model_paths[-1] # Send final model path through the multiprocessing queue queue.put({'model_path': rpn_model_path}) #通過佇列將該程序執行的模型結果的路徑返回 #這個函式利用rpn網路來生成proposals的 def rpn_generate(queue=None, imdb_name=None, rpn_model_path=None, cfg=None, rpn_test_prototxt=None): """Use a trained RPN to generate proposals. """ cfg.TEST.RPN_PRE_NMS_TOP_N = -1 # no pre NMS filtering cfg.TEST.RPN_POST_NMS_TOP_N = 2000 # limit top boxes after NMS print 'RPN model: {}'.format(rpn_model_path) print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) # NOTE: the matlab implementation computes proposals on flipped images, too. # We compute them on the image once and then flip the already computed # proposals. This might cause a minor loss in mAP (less proposal jittering). imdb = get_imdb(imdb_name) print 'Loaded dataset `{:s}` for proposal generation'.format(imdb.name) # Load RPN and configure output directory rpn_net = caffe.Net(rpn_test_prototxt, rpn_model_path, caffe.TEST) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) # Generate proposals on the imdb rpn_proposals = imdb_proposals(rpn_net, imdb) # Write proposals to disk and send the proposal file path through the # multiprocessing queue rpn_net_name = os.path.splitext(os.path.basename(rpn_model_path))[0] rpn_proposals_path = os.path.join( output_dir, rpn_net_name + '_proposals.pkl') with open(rpn_proposals_path, 'wb') as f: cPickle.dump(rpn_proposals, f, cPickle.HIGHEST_PROTOCOL) print 'Wrote RPN proposals to {}'.format(rpn_proposals_path) queue.put({'proposal_path': rpn_proposals_path}) #這個函式是用來訓練檢測網路的 def train_fast_rcnn(queue=None, imdb_name=None, init_model=None, solver=None, max_iters=None, cfg=None, rpn_file=None): """Train a Fast R-CNN using proposals generated by an RPN. """ cfg.TRAIN.HAS_RPN = False # not generating prosals on-the-fly cfg.TRAIN.PROPOSAL_METHOD = 'rpn' # use pre-computed RPN proposals instead cfg.TRAIN.IMS_PER_BATCH = 2 print 'Init model: {}'.format(init_model) print 'RPN proposals: {}'.format(rpn_file) print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) roidb, imdb = get_roidb(imdb_name, rpn_file=rpn_file) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) # Train Fast R-CNN model_paths = train_net(solver, roidb, output_dir, pretrained_model=init_model, max_iters=max_iters) # Cleanup all but the final model for i in model_paths[:-1]: os.remove(i) fast_rcnn_model_path = model_paths[-1] # Send Fast R-CNN model path over the multiprocessing queue queue.put({'model_path': fast_rcnn_model_path}) if __name__ == '__main__': #建議讀者除錯這個函式,進去看看每個變數是怎麼回事 args = parse_args() #解析系統傳入的argv引數,解析完放到args中返回 print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) #如果輸入了這個引數,就呼叫該函式,應該是做某些配置操作 if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.GPU_ID = args.gpu_id # cfg是一個詞典(edict)資料結構,從faster-rcnn.config引入的 # -------------------------------------------------------------------------- # Pycaffe doesn't reliably free GPU memory when instantiated nets are # discarded (e.g. "del net" in Python code). To work around this issue, each # training stage is executed in a separate process using # multiprocessing.Process. #這裡說的要使用多程序,因為在pycaffe中當某個網路被discard後,不能可靠保證釋放記憶體資源;程序關閉後資源自然會釋放 # -------------------------------------------------------------------------- # queue for communicated results between processes mp_queue = mp.Queue() #mp指的是multiprocessing庫,所以這裡返回了一個用於多程序通訊的佇列物件 # solves, iters, etc. for each training stage solvers, max_iters, rpn_test_prototxt = get_solvers(args.net_name) #這裡返回了solvers的路徑,maxiters的值,rpn_test_prototxt的路徑 print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Stage 1 RPN, init from ImageNet model' print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' # 這一步是用imageNet的模型初始化,然後訓練rpn網路(整個訓練過程可以參考作者的論文) cfg.TRAIN.SNAPSHOT_INFIX = 'stage1' mp_kwargs = dict( queue=mp_queue, imdb_name=args.imdb_name, init_model=args.pretrained_model, solver=solvers[0], max_iters=max_iters[0], cfg=cfg) # 這裡把該階段需要的引數都放到這裡來了,即函式train_rpn的輸入引數 p = mp.Process(target=train_rpn, kwargs=mp_kwargs) # 顯然,這裡準備啟動一個新程序,呼叫函式train_rpn,傳入引數kwargs,所以我們進入train_rpn函式看看是如何工作的 p.start() rpn_stage1_out = mp_queue.get() p.join() print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Stage 1 RPN, generate proposals' print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' # 這一步是利用上一步訓練好的rpn網路,產生proposals供後面使用 mp_kwargs = dict( queue=mp_queue, imdb_name=args.imdb_name, rpn_model_path=str(rpn_stage1_out['model_path']), cfg=cfg, rpn_test_prototxt=rpn_test_prototxt) p = mp.Process(target=rpn_generate, kwargs=mp_kwargs) p.start() rpn_stage1_out['proposal_path'] = mp_queue.get()['proposal_path'] p.join() print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Stage 1 Fast R-CNN using RPN proposals, init from ImageNet model' print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' #這一步是再次用imageNet的模型初始化前5層卷積層,然後用上一步得到的proposals訓練檢測網路 cfg.TRAIN.SNAPSHOT_INFIX = 'stage1' mp_kwargs = dict( queue=mp_queue, imdb_name=args.imdb_name, init_model=args.pretrained_model, solver=solvers[1], max_iters=max_iters[1], cfg=cfg, rpn_file=rpn_stage1_out['proposal_path']) p = mp.Process(target=train_fast_rcnn, kwargs=mp_kwargs) p.start() fast_rcnn_stage1_out = mp_queue.get() p.join() print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Stage 2 RPN, init from stage 1 Fast R-CNN model' print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' #這一步固定上一步訓練好的前五層卷積層,再次訓練RPN,這樣就得到最終RPN網路的引數了 cfg.TRAIN.SNAPSHOT_INFIX = 'stage2' mp_kwargs = dict( queue=mp_queue, imdb_name=args.imdb_name, init_model=str(fast_rcnn_stage1_out['model_path']), solver=solvers[2], max_iters=max_iters[2], cfg=cfg) p = mp.Process(target=train_rpn, kwargs=mp_kwargs) p.start() rpn_stage2_out = mp_queue.get() p.join() print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Stage 2 RPN, generate proposals' print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' #利用最終確定的RPN網路產生proposals mp_kwargs = dict( queue=mp_queue, imdb_name=args.imdb_name, rpn_model_path=str(rpn_stage2_out['model_path']), cfg=cfg, rpn_test_prototxt=rpn_test_prototxt) p = mp.Process(target=rpn_generate, kwargs=mp_kwargs) p.start() rpn_stage2_out['proposal_path'] = mp_queue.get()['proposal_path'] p.join() print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Stage 2 Fast R-CNN, init from stage 2 RPN R-CNN model' print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' #利用上一步產生的proposals,訓練出最終的檢測網路 cfg.TRAIN.SNAPSHOT_INFIX = 'stage2' mp_kwargs = dict( queue=mp_queue, imdb_name=args.imdb_name, init_model=str(rpn_stage2_out['model_path']), solver=solvers[3], max_iters=max_iters[3], cfg=cfg, rpn_file=rpn_stage2_out['proposal_path']) p = mp.Process(target=train_fast_rcnn, kwargs=mp_kwargs) p.start() fast_rcnn_stage2_out = mp_queue.get() p.join() # Create final model (just a copy of the last stage) final_path = os.path.join( os.path.dirname(fast_rcnn_stage2_out['model_path']), args.net_name + '_faster_rcnn_final.caffemodel') print 'cp {} -> {}'.format( fast_rcnn_stage2_out['model_path'], final_path) shutil.copy(fast_rcnn_stage2_out['model_path'], final_path) print 'Final model: {}'.format(final_path)
分析上面訓練呼叫的函式train_net,該函式位於:./lib/fast_rcnn/train.py檔案中
# -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Train a Fast R-CNN network.""" import caffe from fast_rcnn.config import cfg import roi_data_layer.roidb as rdl_roidb from utils.timer import Timer import numpy as np import os from caffe.proto import caffe_pb2 import google.protobuf as pb2 class SolverWrapper(object): """A simple wrapper around Caffe's solver. This wrapper gives us control over he snapshotting process, which we use to unnormalize the learned bounding-box regression weights. """ #這就是SolverWrapper的建構函式 def __init__(self, solver_prototxt, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.output_dir = output_dir if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS): # RPN can only use precomputed normalization because there are no # fixed statistics to compute a priori assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED if cfg.TRAIN.BBOX_REG: print 'Computing bounding-box regression targets...' self.bbox_means, self.bbox_stds = \ rdl_roidb.add_bbox_regression_targets(roidb) print 'done' # 這句話呼叫了caffe的SGDSolver,這個是caffe在C++中實現的一個類,用來進行隨機梯度下降優化,該類根據solver_prototxt中定義的網路和求解引數,完成網路 # 初始化,然後返回類SGDSolver的一個例項,關於該類的設計可以參考caffe的網站:http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SGDSolver.html # 然後作者把該物件作為SolverWrapper的一個成員,命名為solver self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: print ('Loading pretrained model ' 'weights from {:s}').format(pretrained_model) self.solver.net.copy_from(pretrained_model)#這句話完成對網路的初始化 self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param)#這句話應該是設定了self.solver_param這個成員的引數 self.solver.net.layers[0].set_roidb(roidb)#這句話傳入訓練的資料:roidb def snapshot(self): """Take a snapshot of the network after unnormalizing the learned bounding-box regression weights. This enables easy use at test-time. """ net = self.solver.net scale_bbox_params = (cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS and net.params.has_key('bbox_pred')) if scale_bbox_params: # save original values orig_0 = net.params['bbox_pred'][0].data.copy() orig_1 = net.params['bbox_pred'][1].data.copy() # scale and shift with bbox reg unnormalization; then save snapshot net.params['bbox_pred'][0].data[...] = \ (net.params['bbox_pred'][0].data * self.bbox_stds[:, np.newaxis]) net.params['bbox_pred'][1].data[...] = \ (net.params['bbox_pred'][1].data * self.bbox_stds + self.bbox_means) infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') filename = (self.solver_param.snapshot_prefix + infix + '_iter_{:d}'.format(self.solver.iter) + '.caffemodel') filename = os.path.join(self.output_dir, filename) net.save(str(filename)) print 'Wrote snapshot to: {:s}'.format(filename) if scale_bbox_params: # restore net to original state net.params['bbox_pred'][0].data[...] = orig_0 net.params['bbox_pred'][1].data[...] = orig_1 return filename def train_model(self, max_iters): """Network training loop.""" last_snapshot_iter = -1 timer = Timer() model_paths = [] while self.solver.iter < max_iters: # Make one SGD update timer.tic()#作者測量一次迭代花的時間 self.solver.step(1)# 做一次梯度下降優化 timer.toc() if self.solver.iter % (10 * self.solver_param.display) == 0: print 'speed: {:.3f}s / iter'.format(timer.average_time) if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = self.solver.iter model_paths.append(self.snapshot()) if last_snapshot_iter != self.solver.iter: model_paths.append(self.snapshot()) return model_paths def get_training_roidb(imdb): """Returns a roidb (Region of Interest database) for use in training.""" if cfg.TRAIN.USE_FLIPPED: print 'Appending horizontally-flipped training examples...' imdb.append_flipped_images() print 'done' print 'Preparing training data...' rdl_roidb.prepare_roidb(imdb) print 'done' return imdb.roidb def filter_roidb(roidb): """Remove roidb entries that have no usable RoIs.""" def is_valid(entry): # Valid images have: # (1) At least one foreground RoI OR # (2) At least one background RoI overlaps = entry['max_overlaps'] # find boxes with sufficient overlap fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # image is only valid if such boxes exist valid = len(fg_inds) > 0 or len(bg_inds) > 0 return valid num = len(roidb) filtered_roidb = [entry for entry in roidb if is_valid(entry)] num_after = len(filtered_roidb) print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after, num, num_after) return filtered_roidb # 該函式先是呼叫了該檔案中定義的類SolverWrapper的建構函式,返回了該類的一個物件sw,然後呼叫了sw的train_model方法進行訓練 # 傳入引數,搭建caffe的網路結構,用預訓練模型完成初始化,這些過程就是在該建構函式中實現的,進入這個建構函式看看 def train_net(solver_prototxt, roidb, output_dir, pretrained_model=None, max_iters=40000): """Train a Fast R-CNN network.""" roidb = filter_roidb(roidb)#刪除一些不滿足要求的輸入圖片 sw = SolverWrapper(solver_prototxt, roidb, output_dir, pretrained_model=pretrained_model)#呼叫建構函式 print 'Solving...' model_paths = sw.train_model(max_iters)#開始訓練模型 print 'done solving' return model_paths
作者:香蕉麥樂迪--sloanqin--覃元元