caffe 實戰系列：如何寫自己的資料層（以Deep Spatial Net為例）

阿新 • • 發佈：2019-02-13

一、前言

想寫自己的層，首先必須得在caffe.proto中定義自己層的引數，以便於在proto配置檔案中對引數進行配置啦什麼的，其次你還要在caffe.proto宣告你的層的引數是可選的，然後你得在caffe的include目錄下新增你自己層的hpp標頭檔案，以及在caffe的src下的layer目錄下新增你自己的cpp實現檔案。

二、具體做法

（1）首先需要在caffe.proto中宣告自己所寫的層使用引數是可選的：

比如，首先在下面紅色的位置加入HeatmapDataParameter

// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
optional DataParameter data_param = 107;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135;
optional HeatmapDataParameter heatmap_data_param = 140;// 加入自己層的引數
optional HDF5DataParameter hdf5_data_param = 112;
optional HDF5OutputParameter hdf5_output_param = 113;
optional HingeLossParameter hinge_loss_param = 114;
optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PythonParameter python_param = 130;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional WindowDataParameter window_data_param = 129;
}

因為我們是將引數定義在了V1LayerParameter層下面的，需要在\src\caffe\util下的upgrade_proto.cpp中加入如下幾行程式碼，方便已經訓練好的模型進行轉換。

constchar* UpgradeV1LayerType(const V1LayerParameter_LayerType type) {
switch (type) {
case V1LayerParameter_LayerType_NONE:
return"";
case V1LayerParameter_LayerType_ABSVAL:
return"AbsVal";
case V1LayerParameter_LayerType_ACCURACY:
return"Accuracy";
case V1LayerParameter_LayerType_ARGMAX:
return"ArgMax";
case V1LayerParameter_LayerType_BNLL:
return"BNLL";
case V1LayerParameter_LayerType_CONCAT:
return"Concat";
case V1LayerParameter_LayerType_CONTRASTIVE_LOSS:
return"ContrastiveLoss";
case V1LayerParameter_LayerType_CONVOLUTION:
return"Convolution";
case V1LayerParameter_LayerType_DECONVOLUTION:
return"Deconvolution";
case V1LayerParameter_LayerType_DATA:
return"Data";
case V1LayerParameter_LayerType_DATA_HEATMAP:// 這是我們自己新增的輸入資料的層
return"DataHeatmap";
case V1LayerParameter_LayerType_DROPOUT:
return"Dropout";
case V1LayerParameter_LayerType_DUMMY_DATA:
return"DummyData";
case V1LayerParameter_LayerType_EUCLIDEAN_LOSS:
return"EuclideanLoss";
case V1LayerParameter_LayerType_EUCLIDEAN_LOSS_HEATMAP:// 這是我們自己新增的計算損失函式的層
return"EuclideanLossHeatmap";
case V1LayerParameter_LayerType_ELTWISE:
return"Eltwise";
case V1LayerParameter_LayerType_EXP:
return"Exp";
case V1LayerParameter_LayerType_FLATTEN:
return"Flatten";
case V1LayerParameter_LayerType_HDF5_DATA:
return"HDF5Data";
case V1LayerParameter_LayerType_HDF5_OUTPUT:
return"HDF5Output";
case V1LayerParameter_LayerType_HINGE_LOSS:
return"HingeLoss";
case V1LayerParameter_LayerType_IM2COL:
return"Im2col";
case V1LayerParameter_LayerType_IMAGE_DATA:
return"ImageData";
case V1LayerParameter_LayerType_INFOGAIN_LOSS:
return"InfogainLoss";
case V1LayerParameter_LayerType_INNER_PRODUCT:
return"InnerProduct";
case V1LayerParameter_LayerType_LRN:
return"LRN";
case V1LayerParameter_LayerType_MEMORY_DATA:
return"MemoryData";
case V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
return"MultinomialLogisticLoss";
case V1LayerParameter_LayerType_MVN:
return"MVN";
case V1LayerParameter_LayerType_POOLING:
return"Pooling";
case V1LayerParameter_LayerType_POWER:
return"Power";
case V1LayerParameter_LayerType_RELU:
return"ReLU";
case V1LayerParameter_LayerType_SIGMOID:
return"Sigmoid";
case V1LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS:
return"SigmoidCrossEntropyLoss";
case V1LayerParameter_LayerType_SILENCE:
return"Silence";
case V1LayerParameter_LayerType_SOFTMAX:
return"Softmax";
case V1LayerParameter_LayerType_SOFTMAX_LOSS:
return"SoftmaxWithLoss";
case V1LayerParameter_LayerType_SPLIT:
return"Split";
case V1LayerParameter_LayerType_SLICE:
return"Slice";
case V1LayerParameter_LayerType_TANH:
return"TanH";
case V1LayerParameter_LayerType_WINDOW_DATA:
return"WindowData";
case V1LayerParameter_LayerType_THRESHOLD:
return"Threshold";
default:
LOG(FATAL) << "Unknown V1LayerParameter layer type: " << type;
return"";
}
}

（2）然後在caffe.proto中下面的位置加入你自己的層的引數：

// VGG heatmap params 自己層的引數
message HeatmapDataParameter {
optional bool segmentation = 1000 [default = false];
optional uint32 multfact = 1001 [default = 1];
optional uint32 num_channels = 1002 [default = 3];
optional uint32 batchsize = 1003;
optional string root_img_dir = 1004;
optional bool random_crop = 1005; // image augmentation type
optional bool sample_per_cluster = 1006; // image sampling type
optional string labelinds = 1007 [default = '']; // if specified, only use these regression variables
optional string source = 1008;
optional string meanfile = 1009;
optional string crop_meanfile = 1010;
optional uint32 cropsize = 1011 [default = 0];
optional uint32 outsize = 1012 [default = 0];
optional float scale = 1013 [ default = 1 ];
optional uint32 label_width = 1014 [ default = 1 ];
optional uint32 label_height = 1015 [ default = 1 ];
optional bool dont_flip_first = 1016 [ default = true ];
optional float angle_max = 1017 [ default = 0 ];
optional bool flip_joint_labels = 1018 [ default = true ];
}

還有視覺化的測試引數

/ NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 139 (last added: tile_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
// The train / test phase for computation.
optional Phase phase = 10;
// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 5;
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
repeated ParamSpec param = 6;
// The blobs containing the numeric parameters of the layer.
repeated BlobProto blobs = 7;
// Specifies on which bottoms the backpropagation should be skipped.
// The size must be either 0 or equal to the number of bottoms.
repeated bool propagate_down = 11;
// Rules controlling whether and when a layer is included in the network,
// based on the current NetState. You may specify a non-zero number of rules
// to include OR exclude, but not both. If no include or exclude rules are
// specified, the layer is always included. If the current NetState meets
// ANY (i.e., one or more) of the specified rules, the layer is
// included/excluded.
repeated NetStateRule include = 8;
repeated NetStateRule exclude = 9;
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 100;
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;
// Options to allow visualisation視覺化層的引數，就這兩貨哈
optional bool visualise = 200 [ default = false ];
optional uint32 visualise_channel = 201 [ default = 0 ];

下面對各個引數進行解釋： segmentation 是否分割，預設是否，假設影象的分割模板在segs/目錄 multfact 將ground truth中的關節乘以這個multfact，就是影象中的位置，影象中的位置除以這個就是關節的位置，預設是1，也就是說關節的座標與影象的座標是一致大小的 num_channels 影象的channel數預設是3 batchsize batch大小 root_img_dir 存放影象檔案的根目錄 random_crop 是否需要隨機crop影象（如果true則做隨機crop，否則做中心crop） sample_per_cluster 影象取樣的型別（是否均勻地在clusters上取樣） labelinds 類標索引（只使用迴歸變數才設定這個） source 存放打亂檔案順序之後的檔案路徑的txt檔案 meanfile 平均值檔案路徑 crop_meanfile crop之後的平均值檔案路徑 cropsize crop的大小 outsize 預設是0（就是crop出來之後的影象會縮放的因子，0表示不縮放） scale 預設是1，實際上就是一系列預處理（去均值、crop、縮放之後的畫素值乘以該scale得到最終的影象的） label_width heatmap的寬 label_height heatmap的高 dont_flip_first 不要對調第一個關節的位置，預設是true angle_max 對影象進行旋轉的最大角度，用於增強資料的，預設是0度 flip_joint_labels 預設是true（即水平翻轉，將左右的關節對調）為了保證完整性，把英文解釋全部： - visualise: show visualisations for crops, rotations etc (recommended for testing) - source: label file - root_img_dir: directory with images (recommend you store images on ramdisk) - meanfile: proto file containing the mean image(s) to be subtracted (optional) - cropsize: size of random crop (randomly cropped from the original image) - outsize: size that crops are resized to - multfact: label coordinates in the ground truth text file are multiplied by this (default 1) - sample_per_cluster: sample evenly across clusters - random_crop: do random crop (if false, do center crop) - label_height/width: width of regressed heatmap (must match net config) - segmentation: segment images on the fly (assumes images are in a segs/ directory) - angle_max: max rotation angle for training augmentation - flip_joint_labels: when horizontally flipping images for augmentation, if this is set to true the code also swaps left<->right labels (this is important e.g. for observer-centric pose estimation). This assumes that the left,right joint labelsare listed consecutively (e.g. wrist_left,wrist_right,elbow_left,elbow_right) - dont_flip_first: This option allows you to turn off label mirroring for the first label. E.g. for labels head,wrist_right,wrist_left,elbow_right,elbow_left,shoulder_right,shoulder_left, the first joint is head and should not be swapped with wrist_right.

（3）這樣，你就可以在proto中配置你自己層的引數了

下面給出一個配置heatmapdata層的例項：

layer {
name: "data"
type: "DataHeatmap"// 層的型別是DataHeatmap
top: "data"
top: "label"
visualise: false// 是否視覺化
include: { phase: TRAIN }
heatmap_data_param {
source: "/data/tp/flic/train_shuffle.txt"
root_img_dir: "/mnt/ramdisk/tp/flic/"
batchsize: 14
cropsize: 248
outsize: 256
sample_per_cluster: false
random_crop: true
label_width: 64
label_height: 64
segmentation: false
flip_joint_labels: true
dont_flip_first: true
angle_max: 40
multfact: 1 # set to 282 ifusing preprocessed data from website
}
}

（4）heatmapdata層的實現

1）在介紹實現之前需要給出我們的訓練資料的樣子 看完引數，我們看一下訓練的資料的格式感性理解一下：下面給出一個樣例： train/FILE.jpg 123,144,165,123,66,22 372.296,720,1,480,0.53333 0 下面對樣例做出解釋引數之間是以空格分隔第一個引數是影象的路徑：train/FILE.jpg 第二個引數是關節座標：123,144,165,123,66,22 第三個引數是crop和scale的引數，分別為x_left,x_right,y_left,y_right,scaling_fact：372.296,720,1,480,0.53333 注意：第三個引數的crop的座標其實上針對的是mean影象的，在mean影象中進行crop，然後放大到與原始影象一樣大小，然後原始影象減去經過crop且放大之後的mean影象。這樣在對原始影象進行crop的時候就不用擔心了第四個引數是是否cluster,是否均勻地在訓練中取樣影象： 0 This is a space-delimited file where the first arg is the path to your image the second arg is a comma-delimited list of (x,y) coordinates you wish to regress (the coordinates in the train/FILE.jpg image space) the third arg is a comma-delimited list of crops & scaling factors of the input image (in order x_left,x_right,y_left,y_right,scaling_fact). Note: These crop & scaling factors are only used to crop the mean image. You can set these to 0 if you aren't using a mean image (for mean subtraction). the fourth arg is a coordinate 'cluster' (from which you have the option to evenly sample images in training). You can set this to 0. 2）在講解該層如何實現之前首先介紹點預備知識： ①首先給出在opencv中如何crop一幅影象

// You mention that you start with a CVMat* imagesource
CVMat * imagesource;
// Transform it into the C++ cv::Mat format
cv::Mat image(imagesource);
// Setup a rectangle to define your region of interest
cv::Rect myROI(10, 10, 100, 100);
// Crop the full image to that image contained by the rectangle myROI
// Note that this doesn't copy the data
cv::Mat croppedImage = image(myROI);

②如何進行隨機crop以及中心crop

上圖中的黃色邊框表示影象藍色邊框表示x_border = x-cropsize以及y_border=y-cropsize大小的crop區域如果隨機crop則表示從[0,x_border-1]以及[0,y_border-1]大小的區域（也就是圖中的藍色矩形框內）隨機採集一個點座標crop的左上角的點，然後以cropsize為邊長取一個正方型。如果是中心crop則取圖中兩個虛線的交點，即藍色矩形的中心座標crop的左上角的點，然後以cropsize為邊長取一個正方形。 3）我們所寫的層應該繼承那個基類 我們所寫的HeatmapData層是繼承自BasePrefetchingDataLayer的（在檔案data_layers.hpp中），下面給出其定義

template <typename Dtype>
class BasePrefetchingDataLayer :
public BaseDataLayer<Dtype>, public InternalThread {
public:
explicit BasePrefetchingDataLayer(const LayerParameter& param);
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden.
void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtualvoid Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtualvoid Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Prefetches batches (asynchronously if to GPU memory)
staticconstint PREFETCH_COUNT = 3
protected:
virtualvoid InternalThreadEntry();
virtualvoid load_batch(Batch<Dtype>* batch) = 0;
Batch<Dtype> prefetch_[PREFETCH_COUNT];
BlockingQueue<Batch<Dtype>*> prefetch_free_;
BlockingQueue<Batch<Dtype>*> prefetch_full_;
Blob<Dtype> transformed_data_;
};

4）實現自己的層 首先定義層的標頭檔案

#ifndef CAFFE_HEATMAP_HPP_
#define CAFFE_HEATMAP_HPP_
#include "caffe/layer.hpp"
#include <vector>
#include <boost/timer/timer.hpp>
#include <opencv2/core/core.hpp>
#include "caffe/common.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/filler.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe
{
// 繼承自PrefetchingDataLayer
template<typename Dtype>
class DataHeatmapLayer: public BasePrefetchingDataLayer<Dtype>
{
public:
explicit DataHeatmapLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~DataHeatmapLayer();
virtualvoid DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtualinlineconstchar* type() const { return"DataHeatmap"; }
virtualinlineint ExactNumBottomBlobs() const { return 0; }
virtualinlineint ExactNumTopBlobs() const { return 2; }
protected:
// 虛擬函式，就是實際讀取一批資料到Batch中
virtualvoid load_batch(Batch<Dtype>* batch);
// 以下都是自己定義的要使用的函式，都在load_batch中被呼叫了
// Filename of current image
inlinevoid GetCurImg(string& img_name, std::vector<float>& img_class, std::vector<float>& crop_info, int& cur_class);
inlinevoid AdvanceCurImg();
// Visualise point annotations
inlinevoid VisualiseAnnotations(cv::Mat img_annotation_vis, int numChannels, std::vector<float>& cur_label, int width);
// Random number generator
inlinefloat Uniform(constfloat min, constfloat max);
// Rotate image for augmentation
inline cv::Mat RotateImage(cv::Mat src, float rotation_angle);
// Global vars
shared_ptr<Caffe::RNG> rng_data_;
shared_ptr<Caffe::RNG> prefetch_rng_;
vector<std::pair<std::string, int> > lines_;
int lines_id_;
int datum_channels_;
int datum_height_;
int datum_width_;
int datum_size_;
int num_means_;
int cur_class_;
vector<int> labelinds_;
vector<cv::Mat> mean_img_;
bool sub_mean_; // true if the mean should be subtracted
bool sample_per_cluster_; // sample separately per cluster?
string root_img_dir_;
vector<float> cur_class_img_; // current class index
int cur_img_; // current image index
vector<int> img_idx_map_; // current image indices for each class
// array of lists: one list of image names per class
vector< vector< pair<string, pair<vector<float>, pair<vector<float>, int> > > > > img_list_;
// vector of (image, label) pairs
vector< pair<string, pair<vector<float>, pair<vector<float>, int> > > > img_label_list_;
};
}
#endif /* CAFFE_HEATMAP_HPP_ */

在介紹詳細實現之前先口述一下實現的流程： 1）首先在SetUp該函式中讀取，proto中的引數，從而獲得一批資料的大小、heatmap的長和寬，對影象進行切割的大小，以及切割後的影象需要縮放到多大，還有就是是否需要對每個類別的影象進行取樣、放置影象的根目錄等資訊。此外還讀取每個影象檔案的路徑、關節的座標位置、crop的位置、是否進行取樣。如果在每個類上進行取樣，還會生成一個數組，該陣列對應的是影象的類別索引與影象的索引之間的對映。此外還從檔案中讀取每個視訊的mean，然後將所讀取的mean放到vector容器中，便於在讀取資料的時候從影象中取出mean。最後還會設定top的形狀 2）在load_batch這個函式中就是真正地讀取資料，並且對資料進行預處理，預處理主要是是否對影象進行分割，對平均值影象進行切割，並將切割的影象塊放大到影象的大小，然後用影象減去該段視訊切割並方法的平均值影象（你會不會覺得很奇怪，我也覺得很奇怪。。。竟然是切割平均值影象的，然後放大到與原影象一樣的大小，然後再用原影象減去該均值影象，主要是原理我沒想明白）。

#include <fstream> // NOLINT(readability/streams)
#include <iostream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
#include <stdint.h>
#include <cmath>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/highgui/highgui_c.h>
#include <opencv2/imgproc/imgproc.hpp>
#include "caffe/layers/data_heatmap.hpp"
#include "caffe/util/benchmark.hpp"
#include <unistd.h>
namespace caffe
{
template <typename Dtype>
DataHeatmapLayer<Dtype>::~DataHeatmapLayer<Dtype>() {
this->StopInternalThread();
}
// 讀取引數檔案中的一些資料什麼的，然後初始化
template<typename Dtype>
void DataHeatmapLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
HeatmapDataParameter heatmap_data_param = this->layer_param_.heatmap_data_param();
// Shortcuts
// 類標索引字串（也就是關節型別？）
const std::string labelindsStr = heatmap_data_param.labelinds();
// batchsize
constint batchsize = heatmap_data_param.batchsize();
// heatmap的寬度
constint label_width = heatmap_data_param.label_width();
// heatmap的高度
constint label_height = heatmap_data_param.label_height();
// crop的大小
constint size = heatmap_data_param.cropsize();
// crop之後再次進行resize之後的大小
constint outsize = heatmap_data_param.outsize();
// label的batchsize
constint label_batchsize = batchsize;
// 每個cluster都要進行取樣
sample_per_cluster_ = heatmap_data_param.sample_per_cluster();
// 存放影象檔案的根路徑
root_img_dir_ = heatmap_data_param.root_img_dir();
// initialise rng seed
const unsigned int rng_seed = caffe_rng_rand();
srand(rng_seed);
// get label inds to be used for training
// 載入類標索引
std::istringstream labelss(labelindsStr);
LOG(INFO) << "using joint inds:";
while (labelss)
{
std::string s;
if (!std::getline(labelss, s, ',')) break;
labelinds_.push_back(atof(s.c_str()));
LOG(INFO) << atof(s.c_str());
}
// load GT
// shuffle file
// 載入ground truth檔案，即關節座標檔案
std::string gt_path = heatmap_data_param.source();
LOG(INFO) << "Loading annotation from " << gt_path;
std::ifstream infile(gt_path.c_str());
string img_name, labels, cropInfos, clusterClassStr;
if (!sample_per_cluster_)// 是否根據你指定的類別隨機取影象
{
// sequential sampling
// 檔名，關節位置座標，crop的位置，是否均勻地在clusters上取樣
while (infile >> img_name >> labels >> cropInfos >> clusterClassStr)
{
// read comma-separated list of regression labels
// 讀取關節位置座標
std::vector <float> label;
std::istringstream ss(labels);
int labelCounter = 1;
while (ss)
{
// 讀取一個數字
std::string s;
if (!std::getline(ss, s, ',')) break;
// 是否是類標索引中的值
// 如果labelinds為空或者為不為空在其中找到
if (labelinds_.empty() || std::find(labelinds_.begin(), labelinds_.end(), labelCounter) != labelinds_.end())
{
label.push_back(atof(s.c_str()));
}
labelCounter++;// 個數
}
// read cropping info
// 讀取crop的資訊
std::vector <float> cropInfo;
std::istringstream ss2(cropInfos);
while (ss2)
{
std::string s;
if (!std::getline(ss2, s, ',')) break;
cropInfo.push_back(atof(s.c_str()));
}
int clusterClass = atoi(clusterClassStr.c_str());
// 影象路徑，關節座標，crop資訊、類別
img_label_list_.push_back(std::make_pair(img_name, std::make_pair(label, std::make_pair(cropInfo, clusterClass))));
}
// initialise image counter to 0
cur_img_ = 0;
}
else
{
// uniform sampling w.r.t. classes
// 根據類別均勻取樣
// 也就是說影象有若干個類別，然後每個類別下有若干個影象
// 隨機取其中一個影象
while (infile >> img_name >> labels >> cropInfos >> clusterClassStr)
{
// 獲得你指定的類別
// 如果你制定為0
int clusterClass = atoi(clusterClassStr.c_str());
// 那麼
if (clusterClass + 1 > img_list_.size())
{
// expand the array
img_list_.resize(clusterClass + 1);
}
// read comma-separated list of regression labels
// 讀取關節的座標位置到label這個vector
std::vector <float> label;
std::istringstream ss(labels);
int labelCounter = 1;
while (ss)
{
std::string s;
if (!std::getline(ss, s, ',')) break;
if (labelinds_.empty() || std::find(labelinds_.begin(), labelinds_.end(), labelCounter) != labelinds_.end())
{
label.push_back(atof(s.c_str()));
}
labelCounter++;
}
// read cropping info
// 讀取crop資訊到cropinfo這個vector
std::vector <float> cropInfo;
std::istringstream ss2(cropInfos);
while (ss2)
{
std::string s;
if (!std::getline(ss2, s, ',')) break;
cropInfo.push_back(atof(s.c_str()));
}
// 每個clusterClass下都是一個vector，用於裝各種影象
img_list_[clusterClass].push_back(std::make_pair(img_name, std::make_pair(label, std::make_pair(cropInfo, clusterClass))));
}// while結尾
// 影象的類別個數
constint num_classes = img_list_.size();
// init image sampling
cur_class_ = 0;
// cur_class_img_中存放的是某個類別中隨機取到的影象的索引值
cur_class_img_.resize(num_classes);
// init image indices for each c

caffe 實戰系列：如何寫自己的資料層（以Deep Spatial Net為例）

一、前言

二、具體做法

（1）首先需要在caffe.proto中宣告自己所寫的層使用引數是可選的：

（2）然後在caffe.proto中下面的位置加入你自己的層的引數：

（3）這樣，你就可以在proto中配置你自己層的引數了

（4）heatmapdata層的實現

caffe 實戰系列：如何寫自己的資料層（以Deep Spatial Net為例）

Caffe實戰系列：實現自己Caffe網路層

caffe學習系列：訓練自己的圖片集（超詳細教程）

資料探勘入門系列教程（三）之scikit-learn框架基本使用（以K近鄰演算法為例）

統計學習三：2.K近鄰法代碼實現（以最近鄰法為例）

python中如何建立資料夾（以丟擲異常為依據）

詳解如何用爬蟲採集視訊播放量資料（以騰訊視訊為例）

在大資料浪潮下如何高效的獲取跨境電商銷售資料（以亞馬遜為例）

3. CKeditor+ckfinder ---CKFinder原始碼修改上傳自定義資料夾名（以時間年月YYYYMM為例）

基本資料型別和包裝類的關係（以int和Integer為例）

Python爬蟲專案實戰3 | 圖片文字識別（以驗證碼識別為例）

1.1.14 新增自己編寫的巨集程式碼（以程式碼行編號為例）

網路資料抓取及其R實現（以鏈家樓盤為例）

【14】Caffe學習系列：計算圖片資料的均值

Caffe實戰系列：最簡潔的Caffe安裝教程(以ubuntu14.04為例)

caffe 實戰系列：proto檔案格式以及含義解析：如何定義網路，如何設定網路引數(以AlexNet為例) 2016.3.30

linux驅動由淺入深系列：usb子系統之四（android平臺滑鼠驅動程式碼分析）

將Maven工程匯出war包（匯出可執行檔案） war包的安裝與部署（以兩個Tomcat為例，詳細）請到：

斷電，軟體崩潰，系統中毒，未點選儲存，就關閉導致資料丟失，以word檔案檔案為例，如何找回

Redis 分散式鎖：樂觀鎖的實現，以秒殺系統為例

caffe 實戰系列：如何寫自己的資料層（以Deep Spatial Net為例）

一、前言

二、具體做法

（1）首先需要在caffe.proto中宣告自己所寫的層使用引數是可選的：

（2）然後在caffe.proto中下面的位置加入你自己的層的引數：

（3）這樣，你就可以在proto中配置你自己層的引數了

（4）heatmapdata層的實現

相關推薦