caffe 實戰系列:如何寫自己的資料層(以Deep Spatial Net為例)
阿新 • • 發佈:2019-02-13
一、前言
想寫自己的層,首先必須得在caffe.proto中定義自己層的引數,以便於在proto配置檔案中對引數進行配置啦什麼的,其次你還要在caffe.proto宣告你的層的引數是可選的,然後你得在caffe的include目錄下新增你自己層的hpp標頭檔案,以及在caffe的src下的layer目錄下新增你自己的cpp實現檔案。二、具體做法
(1)首先需要在caffe.proto中宣告自己所寫的層使用引數是可選的:
比如,首先在下面紅色的位置加入HeatmapDataParameter- // Layer type-specific parameters.
- //
- // Note: certain layers may have more than one computational engine
- // for their implementation. These layers include an Engine type and
- // engine parameter for selecting the implementation.
- // The default for the engine is set by the ENGINE switch at compile-time.
- optional AccuracyParameter accuracy_param = 102;
- optional ArgMaxParameter argmax_param = 103;
- optional ConcatParameter concat_param = 104;
- optional ContrastiveLossParameter contrastive_loss_param = 105;
- optional ConvolutionParameter convolution_param = 106;
- optional DataParameter data_param = 107;
- optional DropoutParameter dropout_param = 108;
- optional DummyDataParameter dummy_data_param = 109;
- optional EltwiseParameter eltwise_param = 110;
- optional EmbedParameter embed_param = 137;
- optional ExpParameter exp_param = 111;
- optional FlattenParameter flatten_param = 135;
- optional HeatmapDataParameter heatmap_data_param = 140;// 加入自己層的引數
- optional HDF5DataParameter hdf5_data_param = 112;
- optional HDF5OutputParameter hdf5_output_param = 113;
- optional HingeLossParameter hinge_loss_param = 114;
- optional ImageDataParameter image_data_param = 115;
- optional InfogainLossParameter infogain_loss_param = 116;
- optional InnerProductParameter inner_product_param = 117;
- optional LogParameter log_param = 134;
- optional LRNParameter lrn_param = 118;
- optional MemoryDataParameter memory_data_param = 119;
- optional MVNParameter mvn_param = 120;
- optional PoolingParameter pooling_param = 121;
- optional PowerParameter power_param = 122;
- optional PReLUParameter prelu_param = 131;
- optional PythonParameter python_param = 130;
- optional ReductionParameter reduction_param = 136;
- optional ReLUParameter relu_param = 123;
- optional ReshapeParameter reshape_param = 133;
- optional SigmoidParameter sigmoid_param = 124;
- optional SoftmaxParameter softmax_param = 125;
- optional SPPParameter spp_param = 132;
- optional SliceParameter slice_param = 126;
- optional TanHParameter tanh_param = 127;
- optional ThresholdParameter threshold_param = 128;
- optional TileParameter tile_param = 138;
- optional WindowDataParameter window_data_param = 129;
- }
因為我們是將引數定義在了V1LayerParameter層下面的,需要在\src\caffe\util下的upgrade_proto.cpp中加入如下幾行程式碼,方便已經訓練好的模型進行轉換。
- constchar* UpgradeV1LayerType(const V1LayerParameter_LayerType type) {
- switch (type) {
- case V1LayerParameter_LayerType_NONE:
- return"";
- case V1LayerParameter_LayerType_ABSVAL:
- return"AbsVal";
- case V1LayerParameter_LayerType_ACCURACY:
- return"Accuracy";
- case V1LayerParameter_LayerType_ARGMAX:
- return"ArgMax";
- case V1LayerParameter_LayerType_BNLL:
- return"BNLL";
- case V1LayerParameter_LayerType_CONCAT:
- return"Concat";
- case V1LayerParameter_LayerType_CONTRASTIVE_LOSS:
- return"ContrastiveLoss";
- case V1LayerParameter_LayerType_CONVOLUTION:
- return"Convolution";
- case V1LayerParameter_LayerType_DECONVOLUTION:
- return"Deconvolution";
- case V1LayerParameter_LayerType_DATA:
- return"Data";
- case V1LayerParameter_LayerType_DATA_HEATMAP:// 這是我們自己新增的輸入資料的層
- return"DataHeatmap";
- case V1LayerParameter_LayerType_DROPOUT:
- return"Dropout";
- case V1LayerParameter_LayerType_DUMMY_DATA:
- return"DummyData";
- case V1LayerParameter_LayerType_EUCLIDEAN_LOSS:
- return"EuclideanLoss";
- case V1LayerParameter_LayerType_EUCLIDEAN_LOSS_HEATMAP:// 這是我們自己新增的計算損失函式的層
- return"EuclideanLossHeatmap";
- case V1LayerParameter_LayerType_ELTWISE:
- return"Eltwise";
- case V1LayerParameter_LayerType_EXP:
- return"Exp";
- case V1LayerParameter_LayerType_FLATTEN:
- return"Flatten";
- case V1LayerParameter_LayerType_HDF5_DATA:
- return"HDF5Data";
- case V1LayerParameter_LayerType_HDF5_OUTPUT:
- return"HDF5Output";
- case V1LayerParameter_LayerType_HINGE_LOSS:
- return"HingeLoss";
- case V1LayerParameter_LayerType_IM2COL:
- return"Im2col";
- case V1LayerParameter_LayerType_IMAGE_DATA:
- return"ImageData";
- case V1LayerParameter_LayerType_INFOGAIN_LOSS:
- return"InfogainLoss";
- case V1LayerParameter_LayerType_INNER_PRODUCT:
- return"InnerProduct";
- case V1LayerParameter_LayerType_LRN:
- return"LRN";
- case V1LayerParameter_LayerType_MEMORY_DATA:
- return"MemoryData";
- case V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
- return"MultinomialLogisticLoss";
- case V1LayerParameter_LayerType_MVN:
- return"MVN";
- case V1LayerParameter_LayerType_POOLING:
- return"Pooling";
- case V1LayerParameter_LayerType_POWER:
- return"Power";
- case V1LayerParameter_LayerType_RELU:
- return"ReLU";
- case V1LayerParameter_LayerType_SIGMOID:
- return"Sigmoid";
- case V1LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS:
- return"SigmoidCrossEntropyLoss";
- case V1LayerParameter_LayerType_SILENCE:
- return"Silence";
- case V1LayerParameter_LayerType_SOFTMAX:
- return"Softmax";
- case V1LayerParameter_LayerType_SOFTMAX_LOSS:
- return"SoftmaxWithLoss";
- case V1LayerParameter_LayerType_SPLIT:
- return"Split";
- case V1LayerParameter_LayerType_SLICE:
- return"Slice";
- case V1LayerParameter_LayerType_TANH:
- return"TanH";
- case V1LayerParameter_LayerType_WINDOW_DATA:
- return"WindowData";
- case V1LayerParameter_LayerType_THRESHOLD:
- return"Threshold";
- default:
- LOG(FATAL) << "Unknown V1LayerParameter layer type: " << type;
- return"";
- }
- }
(2)然後在caffe.proto中下面的位置加入你自己的層的引數:
- // VGG heatmap params 自己層的引數
- message HeatmapDataParameter {
- optional bool segmentation = 1000 [default = false];
- optional uint32 multfact = 1001 [default = 1];
- optional uint32 num_channels = 1002 [default = 3];
- optional uint32 batchsize = 1003;
- optional string root_img_dir = 1004;
- optional bool random_crop = 1005; // image augmentation type
- optional bool sample_per_cluster = 1006; // image sampling type
- optional string labelinds = 1007 [default = '']; // if specified, only use these regression variables
- optional string source = 1008;
- optional string meanfile = 1009;
- optional string crop_meanfile = 1010;
- optional uint32 cropsize = 1011 [default = 0];
- optional uint32 outsize = 1012 [default = 0];
- optional float scale = 1013 [ default = 1 ];
- optional uint32 label_width = 1014 [ default = 1 ];
- optional uint32 label_height = 1015 [ default = 1 ];
- optional bool dont_flip_first = 1016 [ default = true ];
- optional float angle_max = 1017 [ default = 0 ];
- optional bool flip_joint_labels = 1018 [ default = true ];
- }
- / NOTE
- // Update the next available ID when you add a new LayerParameter field.
- //
- // LayerParameter next available layer-specific ID: 139 (last added: tile_param)
- message LayerParameter {
- optional string name = 1; // the layer name
- optional string type = 2; // the layer type
- repeated string bottom = 3; // the name of each bottom blob
- repeated string top = 4; // the name of each top blob
- // The train / test phase for computation.
- optional Phase phase = 10;
- // The amount of weight to assign each top blob in the objective.
- // Each layer assigns a default value, usually of either 0 or 1,
- // to each top blob.
- repeated float loss_weight = 5;
- // Specifies training parameters (multipliers on global learning constants,
- // and the name and other settings used for weight sharing).
- repeated ParamSpec param = 6;
- // The blobs containing the numeric parameters of the layer.
- repeated BlobProto blobs = 7;
- // Specifies on which bottoms the backpropagation should be skipped.
- // The size must be either 0 or equal to the number of bottoms.
- repeated bool propagate_down = 11;
- // Rules controlling whether and when a layer is included in the network,
- // based on the current NetState. You may specify a non-zero number of rules
- // to include OR exclude, but not both. If no include or exclude rules are
- // specified, the layer is always included. If the current NetState meets
- // ANY (i.e., one or more) of the specified rules, the layer is
- // included/excluded.
- repeated NetStateRule include = 8;
- repeated NetStateRule exclude = 9;
- // Parameters for data pre-processing.
- optional TransformationParameter transform_param = 100;
- // Parameters shared by loss layers.
- optional LossParameter loss_param = 101;
- // Options to allow visualisation視覺化層的引數,就這兩貨哈
- optional bool visualise = 200 [ default = false ];
- optional uint32 visualise_channel = 201 [ default = 0 ];
下面對各個引數進行解釋: segmentation 是否分割,預設是否, 假設影象的分割模板在segs/目錄 multfact 將ground truth中的關節乘以這個multfact,就是影象中的位置,影象中的位置除以這個就是關節的位置,預設是1,也就是說關節的座標與影象的座標是一致大小的 num_channels 影象的channel數預設是3 batchsize batch大小 root_img_dir 存放影象檔案的根目錄 random_crop 是否需要隨機crop影象(如果true則做隨機crop,否則做中心crop) sample_per_cluster 影象取樣的型別(是否均勻地在clusters上取樣) labelinds 類標索引(只使用迴歸變數才設定這個) source 存放打亂檔案順序之後的檔案路徑的txt檔案 meanfile 平均值檔案路徑 crop_meanfile crop之後的平均值檔案路徑 cropsize crop的大小 outsize 預設是0(就是crop出來之後的影象會縮放的因子,0表示不縮放) scale 預設是1,實際上就是一系列預處理(去均值、crop、縮放之後的畫素值乘以該scale得到最終的影象的) label_width heatmap的寬 label_height heatmap的高 dont_flip_first 不要對調第一個關節的位置,預設是true angle_max 對影象進行旋轉的最大角度,用於增強資料的,預設是0度 flip_joint_labels 預設是true(即水平翻轉,將左右的關節對調) 為了保證完整性,把英文解釋全部: - visualise: show visualisations for crops, rotations etc (recommended for testing) - source: label file - root_img_dir: directory with images (recommend you store images on ramdisk) - meanfile: proto file containing the mean image(s) to be subtracted (optional) - cropsize: size of random crop (randomly cropped from the original image) - outsize: size that crops are resized to - multfact: label coordinates in the ground truth text file are multiplied by this (default 1) - sample_per_cluster: sample evenly across clusters - random_crop: do random crop (if false, do center crop) - label_height/width: width of regressed heatmap (must match net config) - segmentation: segment images on the fly (assumes images are in a segs/ directory) - angle_max: max rotation angle for training augmentation - flip_joint_labels: when horizontally flipping images for augmentation, if this is set to true the code also swaps left<->right labels (this is important e.g. for observer-centric pose estimation). This assumes that the left,right joint labelsare listed consecutively (e.g. wrist_left,wrist_right,elbow_left,elbow_right) - dont_flip_first: This option allows you to turn off label mirroring for the first label. E.g. for labels head,wrist_right,wrist_left,elbow_right,elbow_left,shoulder_right,shoulder_left, the first joint is head and should not be swapped with wrist_right.
(3)這樣,你就可以在proto中配置你自己層的引數了
下面給出一個配置heatmapdata層的例項:- layer {
- name: "data"
- type: "DataHeatmap"// 層的型別是DataHeatmap
- top: "data"
- top: "label"
- visualise: false// 是否視覺化
- include: { phase: TRAIN }
- heatmap_data_param {
- source: "/data/tp/flic/train_shuffle.txt"
- root_img_dir: "/mnt/ramdisk/tp/flic/"
- batchsize: 14
- cropsize: 248
- outsize: 256
- sample_per_cluster: false
- random_crop: true
- label_width: 64
- label_height: 64
- segmentation: false
- flip_joint_labels: true
- dont_flip_first: true
- angle_max: 40
- multfact: 1 # set to 282 ifusing preprocessed data from website
- }
- }
(4)heatmapdata層的實現
1)在介紹實現之前需要給出我們的訓練資料的樣子 看完引數,我們看一下訓練的資料的格式感性理解一下: 下面給出一個樣例: train/FILE.jpg 123,144,165,123,66,22 372.296,720,1,480,0.53333 0 下面對樣例做出解釋 引數之間是以空格分隔 第一個引數是影象的路徑:train/FILE.jpg 第二個引數是關節座標:123,144,165,123,66,22 第三個引數是crop和scale的引數,分別為x_left,x_right,y_left,y_right,scaling_fact:372.296,720,1,480,0.53333 注意:第三個引數的crop的座標其實上針對的是mean影象的,在mean影象中進行crop,然後放大到與原始影象一樣大小,然後原始影象減去經過crop且放大之後的mean影象。這樣在對原始影象進行crop的時候就不用擔心了 第四個引數是是否cluster,是否均勻地在訓練中取樣影象: 0 This is a space-delimited file where the first arg is the path to your image the second arg is a comma-delimited list of (x,y) coordinates you wish to regress (the coordinates in the train/FILE.jpg image space) the third arg is a comma-delimited list of crops & scaling factors of the input image (in order x_left,x_right,y_left,y_right,scaling_fact). Note: These crop & scaling factors are only used to crop the mean image. You can set these to 0 if you aren't using a mean image (for mean subtraction). the fourth arg is a coordinate 'cluster' (from which you have the option to evenly sample images in training). You can set this to 0. 2)在講解該層如何實現之前首先介紹點預備知識: ①首先給出在opencv中如何crop一幅影象- // You mention that you start with a CVMat* imagesource
- CVMat * imagesource;
- // Transform it into the C++ cv::Mat format
- cv::Mat image(imagesource);
- // Setup a rectangle to define your region of interest
- cv::Rect myROI(10, 10, 100, 100);
- // Crop the full image to that image contained by the rectangle myROI
- // Note that this doesn't copy the data
- cv::Mat croppedImage = image(myROI);
②如何進行隨機crop以及中心crop
上圖中的黃色邊框表示影象 藍色邊框表示x_border = x-cropsize以及y_border=y-cropsize大小的crop區域 如果隨機crop則表示從[0,x_border-1]以及[0,y_border-1]大小的區域(也就是圖中的藍色矩形框內)隨機採集一個點座標crop的左上角的點,然後以cropsize為邊長取一個正方型。 如果是中心crop則取圖中兩個虛線的交點,即藍色矩形的中心座標crop的左上角的點,然後以cropsize為邊長取一個正方形。 3)我們所寫的層應該繼承那個基類 我們所寫的HeatmapData層是繼承自BasePrefetchingDataLayer的(在檔案data_layers.hpp中),下面給出其定義
- template <typename Dtype>
- class BasePrefetchingDataLayer :
- public BaseDataLayer<Dtype>, public InternalThread {
- public:
- explicit BasePrefetchingDataLayer(const LayerParameter& param);
- // LayerSetUp: implements common data layer setup functionality, and calls
- // DataLayerSetUp to do special data layer setup for individual layer types.
- // This method may not be overridden.
- void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtualvoid Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtualvoid Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- // Prefetches batches (asynchronously if to GPU memory)
- staticconstint PREFETCH_COUNT = 3
- protected:
- virtualvoid InternalThreadEntry();
- virtualvoid load_batch(Batch<Dtype>* batch) = 0;
- Batch<Dtype> prefetch_[PREFETCH_COUNT];
- BlockingQueue<Batch<Dtype>*> prefetch_free_;
- BlockingQueue<Batch<Dtype>*> prefetch_full_;
- Blob<Dtype> transformed_data_;
- };
4)實現自己的層 首先定義層的標頭檔案
- // Copyright 2014 Tomas Pfister
- #ifndef CAFFE_HEATMAP_HPP_
- #define CAFFE_HEATMAP_HPP_
- #include "caffe/layer.hpp"
- #include <vector>
- #include <boost/timer/timer.hpp>
- #include <opencv2/core/core.hpp>
- #include "caffe/common.hpp"
- #include "caffe/data_transformer.hpp"
- #include "caffe/filler.hpp"
- #include "caffe/internal_thread.hpp"
- #include "caffe/proto/caffe.pb.h"
- namespace caffe
- {
- // 繼承自PrefetchingDataLayer
- template<typename Dtype>
- class DataHeatmapLayer: public BasePrefetchingDataLayer<Dtype>
- {
- public:
- explicit DataHeatmapLayer(const LayerParameter& param)
- : BasePrefetchingDataLayer<Dtype>(param) {}
- virtual ~DataHeatmapLayer();
- virtualvoid DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtualinlineconstchar* type() const { return"DataHeatmap"; }
- virtualinlineint ExactNumBottomBlobs() const { return 0; }
- virtualinlineint ExactNumTopBlobs() const { return 2; }
- protected:
- // 虛擬函式,就是實際讀取一批資料到Batch中
- virtualvoid load_batch(Batch<Dtype>* batch);
- // 以下都是自己定義的要使用的函式,都在load_batch中被呼叫了
- // Filename of current image
- inlinevoid GetCurImg(string& img_name, std::vector<float>& img_class, std::vector<float>& crop_info, int& cur_class);
- inlinevoid AdvanceCurImg();
- // Visualise point annotations
- inlinevoid VisualiseAnnotations(cv::Mat img_annotation_vis, int numChannels, std::vector<float>& cur_label, int width);
- // Random number generator
- inlinefloat Uniform(constfloat min, constfloat max);
- // Rotate image for augmentation
- inline cv::Mat RotateImage(cv::Mat src, float rotation_angle);
- // Global vars
- shared_ptr<Caffe::RNG> rng_data_;
- shared_ptr<Caffe::RNG> prefetch_rng_;
- vector<std::pair<std::string, int> > lines_;
- int lines_id_;
- int datum_channels_;
- int datum_height_;
- int datum_width_;
- int datum_size_;
- int num_means_;
- int cur_class_;
- vector<int> labelinds_;
- vector<cv::Mat> mean_img_;
- bool sub_mean_; // true if the mean should be subtracted
- bool sample_per_cluster_; // sample separately per cluster?
- string root_img_dir_;
- vector<float> cur_class_img_; // current class index
- int cur_img_; // current image index
- vector<int> img_idx_map_; // current image indices for each class
- // array of lists: one list of image names per class
- vector< vector< pair<string, pair<vector<float>, pair<vector<float>, int> > > > > img_list_;
- // vector of (image, label) pairs
- vector< pair<string, pair<vector<float>, pair<vector<float>, int> > > > img_label_list_;
- };
- }
- #endif /* CAFFE_HEATMAP_HPP_ */
在介紹詳細實現之前先口述一下實現的流程: 1)首先在SetUp該函式中讀取,proto中的引數,從而獲得一批資料的大小、heatmap的長和寬,對影象進行切割的大小,以及切割後的影象需要縮放到多大,還有就是是否需要對每個類別的影象進行取樣、放置影象的根目錄等資訊。 此外還讀取每個影象檔案的路徑、關節的座標位置、crop的位置、是否進行取樣。 如果在每個類上進行取樣,還會生成一個數組,該陣列對應的是影象的類別索引與影象的索引之間的對映。 此外還從檔案中讀取每個視訊的mean,然後將所讀取的mean放到vector容器中,便於在讀取資料的時候從影象中取出mean。最後還會設定top的形狀 2)在load_batch這個函式中就是真正地讀取資料,並且對資料進行預處理,預處理主要是是否對影象進行分割,對平均值影象進行切割,並將切割的影象塊放大到影象的大小,然後用影象減去該段視訊切割並方法的平均值影象(你會不會覺得很奇怪,我也覺得很奇怪。。。竟然是切割平均值影象的,然後放大到與原影象一樣的大小,然後再用原影象減去該均值影象,主要是原理我沒想明白)。
- // Copyright 2015 Tomas Pfisterimg
- #include <fstream> // NOLINT(readability/streams)
- #include <iostream> // NOLINT(readability/streams)
- #include <string>
- #include <utility>
- #include <vector>
- #include "caffe/data_layers.hpp"
- #include "caffe/layer.hpp"
- #include "caffe/util/io.hpp"
- #include "caffe/util/math_functions.hpp"
- #include "caffe/util/rng.hpp"
- #include <stdint.h>
- #include <cmath>
- #include <opencv2/core/core.hpp>
- #include <opencv2/highgui/highgui.hpp>
- #include <opencv2/highgui/highgui_c.h>
- #include <opencv2/imgproc/imgproc.hpp>
- #include "caffe/layers/data_heatmap.hpp"
- #include "caffe/util/benchmark.hpp"
- #include <unistd.h>
- namespace caffe
- {
- template <typename Dtype>
- DataHeatmapLayer<Dtype>::~DataHeatmapLayer<Dtype>() {
- this->StopInternalThread();
- }
- // 讀取引數檔案中的一些資料什麼的,然後初始化
- template<typename Dtype>
- void DataHeatmapLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- HeatmapDataParameter heatmap_data_param = this->layer_param_.heatmap_data_param();
- // Shortcuts
- // 類標索引字串(也就是關節型別?)
- const std::string labelindsStr = heatmap_data_param.labelinds();
- // batchsize
- constint batchsize = heatmap_data_param.batchsize();
- // heatmap的寬度
- constint label_width = heatmap_data_param.label_width();
- // heatmap的高度
- constint label_height = heatmap_data_param.label_height();
- // crop的大小
- constint size = heatmap_data_param.cropsize();
- // crop之後再次進行resize之後的大小
- constint outsize = heatmap_data_param.outsize();
- // label的batchsize
- constint label_batchsize = batchsize;
- // 每個cluster都要進行取樣
- sample_per_cluster_ = heatmap_data_param.sample_per_cluster();
- // 存放影象檔案的根路徑
- root_img_dir_ = heatmap_data_param.root_img_dir();
- // initialise rng seed
- const unsigned int rng_seed = caffe_rng_rand();
- srand(rng_seed);
- // get label inds to be used for training
- // 載入類標索引
- std::istringstream labelss(labelindsStr);
- LOG(INFO) << "using joint inds:";
- while (labelss)
- {
- std::string s;
- if (!std::getline(labelss, s, ',')) break;
- labelinds_.push_back(atof(s.c_str()));
- LOG(INFO) << atof(s.c_str());
- }
- // load GT
- // shuffle file
- // 載入ground truth檔案,即關節座標檔案
- std::string gt_path = heatmap_data_param.source();
- LOG(INFO) << "Loading annotation from " << gt_path;
- std::ifstream infile(gt_path.c_str());
- string img_name, labels, cropInfos, clusterClassStr;
- if (!sample_per_cluster_)// 是否根據你指定的類別隨機取影象
- {
- // sequential sampling
- // 檔名,關節位置座標,crop的位置,是否均勻地在clusters上取樣
- while (infile >> img_name >> labels >> cropInfos >> clusterClassStr)
- {
- // read comma-separated list of regression labels
- // 讀取關節位置座標
- std::vector <float> label;
- std::istringstream ss(labels);
- int labelCounter = 1;
- while (ss)
- {
- // 讀取一個數字
- std::string s;
- if (!std::getline(ss, s, ',')) break;
- // 是否是類標索引中的值
- // 如果labelinds為空或者為不為空在其中找到
- if (labelinds_.empty() || std::find(labelinds_.begin(), labelinds_.end(), labelCounter) != labelinds_.end())
- {
- label.push_back(atof(s.c_str()));
- }
- labelCounter++;// 個數
- }
- // read cropping info
- // 讀取crop的資訊
- std::vector <float> cropInfo;
- std::istringstream ss2(cropInfos);
- while (ss2)
- {
- std::string s;
- if (!std::getline(ss2, s, ',')) break;
- cropInfo.push_back(atof(s.c_str()));
- }
- int clusterClass = atoi(clusterClassStr.c_str());
- // 影象路徑,關節座標,crop資訊、類別
- img_label_list_.push_back(std::make_pair(img_name, std::make_pair(label, std::make_pair(cropInfo, clusterClass))));
- }
- // initialise image counter to 0
- cur_img_ = 0;
- }
- else
- {
- // uniform sampling w.r.t. classes
- // 根據類別均勻取樣
- // 也就是說影象有若干個類別,然後每個類別下有若干個影象
- // 隨機取其中一個影象
- while (infile >> img_name >> labels >> cropInfos >> clusterClassStr)
- {
- // 獲得你指定的類別
- // 如果你制定為0
- int clusterClass = atoi(clusterClassStr.c_str());
- // 那麼
- if (clusterClass + 1 > img_list_.size())
- {
- // expand the array
- img_list_.resize(clusterClass + 1);
- }
- // read comma-separated list of regression labels
- // 讀取關節的座標位置到label這個vector
- std::vector <float> label;
- std::istringstream ss(labels);
- int labelCounter = 1;
- while (ss)
- {
- std::string s;
- if (!std::getline(ss, s, ',')) break;
- if (labelinds_.empty() || std::find(labelinds_.begin(), labelinds_.end(), labelCounter) != labelinds_.end())
- {
- label.push_back(atof(s.c_str()));
- }
- labelCounter++;
- }
- // read cropping info
- // 讀取crop資訊到cropinfo這個vector
- std::vector <float> cropInfo;
- std::istringstream ss2(cropInfos);
- while (ss2)
- {
- std::string s;
- if (!std::getline(ss2, s, ',')) break;
- cropInfo.push_back(atof(s.c_str()));
- }
- // 每個clusterClass下都是一個vector,用於裝各種影象
- img_list_[clusterClass].push_back(std::make_pair(img_name, std::make_pair(label, std::make_pair(cropInfo, clusterClass))));
- }// while結尾
- // 影象的類別個數
- constint num_classes = img_list_.size();
- // init image sampling
- cur_class_ = 0;
- // cur_class_img_中存放的是某個類別中隨機取到的影象的索引值
- cur_class_img_.resize(num_classes);
- // init image indices for each c