mxnet深度學習實戰:跑自己的資料實驗和一些問題總結
阿新 • • 發佈:2018-12-22
用mxnet跑自己的資料
0 程式碼編譯
git clone https://github.com/dmlc/mxnet.git
git clone https://github.com/dmlc/mshadow.git
git clone https://github.com/dmlc/dmlc-core.git
git clone https://github.com/dmlc/ps-lite.git
make -j4
1 資料準備
參考 http://blog.csdn.net/a350203223/article/details/50263737 把資料轉換成 REC 模式。
備註: make_list.py 可以自動生成 train 和 val 的 lst檔案。 可使用引數 --train_ratio=XXX
2 跑資料
參考mxnet/example/image-classification裡面train_cifar10.py 和 symbol_inception-bn-28-small.py
symbol檔案主要用來儲存網路結構
一個簡單的3層CNN網路
symbol_UCM.py
- import find_mxnet
- import mxnet as mx
- def get_symbol(num_classes = 21):
- data = mx.symbol.Variable('data')
- # first conv
- conv1 = mx.symbol.Convolution(data=data, kernel=(3
- bn1 = mx.symbol.BatchNorm(data=conv1)
- relu1 = mx.symbol.Activation(data=bn1, act_type="relu")
- pool1 = mx.symbol.Pooling(data=relu1, pool_type="max",
- kernel=(5,5), stride=(3,3))
- # second conv
- conv2 = mx.symbol.Convolution(data=pool1, kernel=(3
- bn2 = mx.symbol.BatchNorm(data=conv2)
- relu2 = mx.symbol.Activation(data=bn2, act_type="relu")
- pool2 = mx.symbol.Pooling(data=relu2, pool_type="max",
- kernel=(3,3), stride=(2,2))
- # second conv
- conv3 = mx.symbol.Convolution(data=pool2, kernel=(3,3), num_filter=196)
- bn3 = mx.symbol.BatchNorm(data=conv3)
- relu3 = mx.symbol.Activation(data=bn3, act_type="relu")
- pool3 = mx.symbol.Pooling(data=relu3, pool_type="max",
- kernel=(2,2), stride=(2,2), name="final_pool")
- # first fullc
- flatten = mx.symbol.Flatten(data=pool3)
- fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=420)
- relu4 = mx.symbol.Activation(data=fc1, act_type="relu")
- # second fullc
- fc2 = mx.symbol.FullyConnected(data=relu4, num_hidden=num_classes)
- # loss
- softmax = mx.symbol.SoftmaxOutput(data=fc2, name='softmax')
- return softmax
train_UCM.py
- import find_mxnet
- import mxnet as mx
- import argparse
- import os, sys
- import train_model
- parser = argparse.ArgumentParser(description='train an image classifer on UCMnet')
- parser.add_argument('--network', type=str, default='UCM_128_BN3layer',
- help = 'the cnn to use')
- parser.add_argument('--data-dir', type=str, default='/home/panda/Ureserch/data/Scene/UCM/',
- help='the input data directory')
- parser.add_argument('--gpus', type=str, default='0',
- help='the gpus will be used, e.g "0,1,2,3"')
- parser.add_argument('--num-examples', type=int, default=1680,
- help='the number of training examples')
- parser.add_argument('--batch-size', type=int, default=64,
- help='the batch size')
- parser.add_argument('--lr', type=float, default=.01,
- help='the initial learning rate')
- parser.add_argument('--lr-factor', type=float, default=.94,
- help='times the lr with a factor for every lr-factor-epoch epoch')
- parser.add_argument('--lr-factor-epoch', type=float, default=5,
- help='the number of epoch to factor the lr, could be .5')
- parser.add_argument('--model-prefix', type=str,
- help='the prefix of the model to load/save')
- parser.add_argument('--num-epochs', type=int, default=80,
- help='the number of training epochs')
- parser.add_argument('--load-epoch', type=int,
- help="load the model on an epoch using the model-prefix")
- parser.add_argument('--kv-store', type=str, default='local',
- help='the kvstore type')
- # 存放訓練資訊,用來畫 training curve
- parser.add_argument('--log-file', type=str,default="xxx",
- help='the name of log file')
- parser.add_argument('--log-dir', type=str, default="/xxx/xxx/xxx/",
- help='directory of the log file')
- args = parser.parse_args()
- # network
- import importlib
- net = importlib.import_module('symbol_' + args.network).get_symbol(21)
- # data 如果沒有 image-mean , 會自動計算,存放於 args.data_dir + "xxx.bin"
- def get_iterator(args, kv):
- data_shape = (3, 109, 109)
- train = mx.io.ImageRecordIter(
- path_imgrec = args.data_dir + "xxx.rec",
- mean_img = args.data_dir + "xxx.bin",
- data_shape = data_shape,
- batch_size = args.batch_size,
- rand_crop = True,
- rand_mirror = True,
- num_parts = kv.num_workers,
- part_index = kv.rank)
- val = mx.io.ImageRecordIter(
- path_imgrec = args.data_dir + "xxxrec",
- mean_img = args.data_dir + "xxx.bin",
- rand_crop = False,
- rand_mirror = False,
- data_shape = data_shape,
- batch_size = args.batch_size,
- num_parts = kv.num_workers,
- part_index = kv.rank)
- return (train, val)
- # train
- train_model.fit(args, net, get_iterator)
3. 利用 log 畫 training和val曲線
需用到matplotlib,提前安裝
- import matplotlib.pyplot as plt
- import numpy as np
- import re
- import argparse
- parser = argparse.ArgumentParser(description='Parses log file and generates train/val curves')
- parser.add_argument('--log-file', type=str,default="/home/panda/Ureserch/mxnet_panda/UCM_EXP/UCM_128_log_4",
- help='the path of log file')
- args = parser.parse_args()
- TR_RE = re.compile('.*?]\sTrain-accuracy=([\d\.]+)')
- VA_RE = re.compile('.*?]\sValidation-accuracy=([\d\.]+)')
- log = open(args.log_file).read()
- log_tr = [float(x) for x in TR_RE.findall(log)]
- log_va = [float(x) for x in VA_RE.findall(log)]
- idx = np.arange(len(log_tr))
- plt.figure(figsize=(8, 6))
- plt.xlabel("Epoch")
- plt.ylabel("Accuracy")
- plt.plot(idx, log_tr, 'o', linestyle='-', color="r",
- label="Train accuracy")
- plt.plot(idx, log_va, 'o', linestyle='-', color="b",
- label="Validation accuracy")
- plt.legend(loc="best")
- plt.xticks(np.arange(min(idx), max(idx)+1, 5))
- plt.yticks(np.arange(0, 1, 0.2))
- plt.ylim([0,1])
- plt.show()
4. 儲存訓練好的模型
在 train_model.py 加入如下程式碼,訓練完成後儲存
- prefix = 'UCM_MODEL'
- iteration = args.num_epochs
- model.save(prefix, iteration)
5. 利用儲存的模型進行predict
predict_UCM.py
- import find_mxnet
- import mxnet as mx
- import logging
- import argparse
- import os, sys
- import train_model
- import numpy as np
- # 這裡用的 mxnet 的 imanet訓練的 Inception模型, 其他模型同理
- prefix = '/home/panda/Ureserch/mxnet_panda/inception-21k model/Inception'
- iteration = 9
- model_load = mx.model.FeedForward.load(prefix, iteration)
- data_shape = (3, 224, 224)
- # 資料準備 batch_size = 1.
- val = mx.io.ImageRecordIter(
- path_imgrec = '/xxx/xxx/' + "xxx.rec",
- mean_img = '/xxx/xxx/' + "xxx.bin",
- rand_crop = False,
- rand_mirror = False,
- data_shape = data_shape,
- batch_size = 1)
- [prob, data1, label1] = model_load.predict(val, return_data=True)
6 利用 pretrain模型提取任意層特徵
feature_extraction.py
模型和資料準備如 step.5, 還是
- internals = model_load.symbol.get_internals()
- # 記住要提取特徵的那一層的名字。 我這是 flatten 。
- fea_symbol = internals["flatten_output"]
- feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=fea_symbol, numpy_batch_size=1,
- arg_params=model_load.arg_params, aux_params=model_load.aux_params,
- allow_extra_params=True)
- [val_feature, valdata, vallabel]= feature_extractor.predict(val, return_data=True)
- 利用 scipy 儲存 為 matlab格式 。畢竟matlab簡單好操
- import scipy.io as sio
- sio.savemat('/xxx/xxx.mat', {'val_feature':val_feature})
7 利用 pretrain 模型來初始化你的網路引數。
再續