1. 程式人生 > >DispNet中Caffe自定義層解讀(二)——DataAugmentation

DispNet中Caffe自定義層解讀(二)——DataAugmentation

DispNet中Caffe自定義層解讀(二)——DataAugmentation

這一系列博文記錄了博主在學習DispNet過程中遇到的自定義Caffe層的筆記。這一部分是DataAugmentation層,其主要功能是:根據要求對輸入的資料進行擴張,從而從資料的角度上儘量緩解過擬合的問題。更新於2018.10.25。

文章目錄

詳細功能介紹

用法

具體來說,DataAugmentation層的用法有兩種:

第一種,層內設定了引數,此時運算的時候會讀取層內設定的引數,這種情況下輸入只有一個(待擴張的資料),輸出可以有兩個(第一個輸出為資料擴張後的結果,第二個輸出為該層的引數);

第二種,層內沒有設定引數,擴張的引數通過輸入提供,此時輸入有兩個(第一個為待擴張的資料,第二個為引數),輸出只有一個(擴張後的資料)。

功能

DataAugmentation支援以下幾種資料擴張方式:

  1. 空間變換(spatial transform):可設定的引數有是否映象、是否旋轉、是否縮放、是否translate(包括直接指定或只指定x或y一個方向);
  2. 顏色變換(chromatic transform):可設定的引數有亮度、gamma、對比度、顏色;
  3. 效果變換(effect transform):可設定的引數有霧面尺寸(fog size)、霧面程度(fog amount)、運動模糊角度、運動模糊尺寸、陰影角度、陰影距離、陰影強度、噪聲;
  4. 顏色特徵變換(chromatic eigen transform)。

呼叫方式

呼叫方式1:層中定義擴張引數

layer {
  name: "DataAugmentation"
  type: "DataAugmentation"
  bottom: "input_blob"
  top: "output_blob1"
  top: "output_blob2"
  propagate_down: false 
  augmentation_param {
    max_multiplier: 1
    augment_during_test: false
    recompute_mean: 1000
    mean_per_pixel: false
    translate {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    zoom {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0.2
      spread: 0.4
      prob: 1.0
    }
    squeeze {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0
      spread: 0.3
      prob: 1.0
    }
    lmult_pow {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: -0.2
      spread: 0.4
      prob: 1.0
    }
    lmult_mult {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0.0
      spread: 0.4
      prob: 1.0
    }
    lmult_add {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 0.03
      prob: 1.0
    }
    sat_pow {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    sat_mult {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: -0.3
      spread: 0.5
      prob: 1.0
    }
    sat_add {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 0.03
      prob: 1.0
    }
    col_pow {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    col_mult {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0
      spread: 0.2
      prob: 1.0
    }
    col_add {
      rand_type: "gaussian_bernoulli"
      exp: false
      mean: 0
      spread: 0.02
      prob: 1.0
    }
    ladd_pow {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    ladd_mult {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0.0
      spread: 0.4
      prob: 1.0
    }
    ladd_add {
      rand_type: "gaussian_bernoulli"
      exp: false
      mean: 0
      spread: 0.04
      prob: 1.0
    }
    col_rotate {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 1
      prob: 1.0
    }
    crop_width: 960
    crop_height: 256
    chromatic_eigvec: 0.51
    chromatic_eigvec: 0.56
    chromatic_eigvec: 0.65
    chromatic_eigvec: 0.79
    chromatic_eigvec: 0.01
    chromatic_eigvec: -0.62
    chromatic_eigvec: 0.35
    chromatic_eigvec: -0.83
    chromatic_eigvec: 0.44
    noise {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0.03
      spread: 0.03
      prob: 1.0
    }
  }
}

呼叫方式2:通過其他資料擴張層提供引數

layer {
  name: "DataAugmentation"
  type: "DataAugmentation"
  bottom: "input_blob"
  bottom: "input_augmented_blob"
  top: "output_blob"
  propagate_down: false 
  propagate_down: false 
  augmentation_param {
    max_multiplier: 1
    augment_during_test: false
    recompute_mean: 1000
    mean_per_pixel: false
    crop_width: 960
    crop_height: 256
    chromatic_eigvec: 0.51
    chromatic_eigvec: 0.56
    chromatic_eigvec: 0.65
    chromatic_eigvec: 0.79
    chromatic_eigvec: 0.01
    chromatic_eigvec: -0.62
    chromatic_eigvec: 0.35
    chromatic_eigvec: -0.83
    chromatic_eigvec: 0.44
  }
}

data_augmentation_layer.hpp

定義了層所需要的變數和函式,其中由於處理的是原始影象資料,因此不支援反向計算:

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
                              const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)  { for(int i=0; i<propagate_down.size(); i++) if(propagate_down[i]) LOG(FATAL) << "DataAugmentationLayer cannot do backward."; return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
                              const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)  { for(int i=0; i<propagate_down.size(); i++) if(propagate_down[i]) LOG(FATAL) << "DataAugmentationLayer cannot do backward."; return; }

data_augmentation_layer.cpp

資料分割的計算和處理只在gpu上執行,因此cpp檔案僅定義資料及引數的讀取和必要的尺寸設定,具體的分割方式在cu檔案中定義。

LayerSetUp

用於從prototxt檔案中讀取層的設定引數。

template <typename Dtype>
void DataAugmentationLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top)
{
  // TODO This won't work when applying a net to images of size different from what the net was trained on
  aug_ = this->layer_param_.augmentation_param();			//讀取資料擴張層的引數到aug_中
  this->layer_param_.set_reshape_every_iter(false);
  LOG(WARNING) << "DataAugmentationLayer only runs Reshape on setup";
  if (this->blobs_.size() > 0)
    LOG(INFO) << "Skipping data mean blob initialization";
  else {
    if (aug_.recompute_mean()) {
      LOG(INFO) << "Recompute mean";
      this->blobs_.resize(3);
      this->blobs_[1].reset(new Blob<Dtype>());
      this->layer_param_.add_param();
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.);
      this->blobs_[2].reset(new Blob<Dtype>());
      this->layer_param_.add_param();
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.);      
    } 
    else {  
      LOG(INFO) << "Do not recompute mean";
      this->blobs_.resize(1);
    }
    this->blobs_[0].reset(new Blob<Dtype>(1, 1, 1, 1));      
    // Never backpropagate
    this->param_propagate_down_.resize(this->blobs_.size(), false);
    this->layer_param_.add_param();
    this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
    this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.); 
//     LOG(INFO) << "DEBUG: this->layer_param_.param_size()=" << this->layer_param_.param_size();
//     LOG(INFO) << "DEBUG: Writing layer_param";
    WriteProtoToTextFile(this->layer_param_, "/misc/lmbraid17/sceneflownet/dosovits/matlab/test/message.prototxt");
//     LOG(INFO) << "DEBUG: Finished writing layer_param";
  } 
}

Reshape

用於設定輸出的尺寸等必要資訊。

template <typename Dtype>
void DataAugmentationLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top)
{
	//檢查輸入和輸出的blob個數是否滿足函式要求
    LOG(WARNING) << "Reshape of Augmentation layer should only be called once? Check this";
    CHECK_GE(bottom.size(), 1) << "Data augmentation layer takes one or two input blobs.";
    CHECK_LE(bottom.size(), 2) << "Data augmentation layer takes one or two input blobs.";
    CHECK_GE(top.size(), 1) << "Data augmentation layer outputs one or two output blobs.";
    CHECK_LE(top.size(), 2) << "Data augmentation layer outputs one or two output blobs.";

	//從輸入中讀取尺寸引數
    const int num = bottom[0]->num();
    const int channels = bottom[0]->channels();
    const int height = bottom[0]->height();
    const int width = bottom[0]->width();

    output_params_=(top.size()>1);
    input_params_=(bottom.size()>1);		//用於判斷是層內定義擴張引數還是從另一個數據擴張層獲得
    aug_ = this->layer_param_.augmentation_param();
    discount_coeff_schedule_ = this->layer_param_.coeff_schedule_param();

    //判斷層中是否給出了要擷取的寬和高:如果給出,進行裁減(要求裁減後的尺寸小於原尺寸);如果沒有,保持原尺寸。
    do_cropping_ = (aug_.has_crop_width() && aug_.has_crop_height());
    if (!do_cropping_)
    {
        cropped_width_ = width;
        cropped_height_ = height;
        LOG(WARNING) << "Please enter crop size if you want to perform augmentation";
    }
    else
    {
        cropped_width_ = aug_.crop_width();    CHECK_GE(width, cropped_width_)   << "crop width greater than original";
        cropped_height_ = aug_.crop_height();  CHECK_GE(height, cropped_height_) << "crop height greater than original";
    }

    //給定輸出的尺寸
    top[0]->Reshape(num, channels, cropped_height_, cropped_width_);

    //確定需要分割的引數
    AugmentationCoeff coeff;
    num_params_ = coeff.GetDescriptor()->field_count();

    //如果這一層的引數是由另一個數據擴張層提供的,則讀取這個(第二個)輸入blob
    if (input_params_) {		//如果有兩個輸入,則根據第二個輸入確定引數
        LOG(INFO) << "Receiving " << num_params_ << " augmentation params";
        all_coeffs_.ReshapeLike(*bottom[1]);		//ReshapeLike:與後面的blob維度相同
    } else		//否則,新建引數
        all_coeffs_.Reshape(num, num_params_, 1, 1); //create

    //如果要求有超過兩個輸出,第二個輸出根據前面的要求確定尺寸
    if (output_params_) {
        top[1]->ReshapeLike(all_coeffs_);
        LOG(INFO) << "Emitting " << num_params_ << " augmentation params";
    }

    //一個batch需要用的引數變換矩陣快取
    coeff_matrices_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tTransMat)));
    
    coeff_chromatic_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tChromaticCoeffs)));
    coeff_chromatic_eigen_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tChromaticEigenCoeffs)));
    coeff_effect_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tEffectCoeffs)));

    chromatic_eigenspace_.reset(new SyncedMemory(sizeof(typename AugmentationLayerBase<Dtype>::tChromaticEigenSpace)));

    //計算資料均值
    if (aug_.recompute_mean()) {		//如果需要重新計算真值
      ones_.Reshape(1, 1, cropped_height_, cropped_width_);
      caffe_set(ones_.count(), Dtype(1), ones_.mutable_cpu_data());
      this->blobs_[1]->Reshape(1, channels, cropped_height_, cropped_width_);
      this->blobs_[2]->Reshape(1, channels, 1, 1);
    }
    else if(aug_.mean().size()==3 && !aug_.mean_per_pixel())
    {
      ones_.Reshape(1, 1, cropped_height_, cropped_width_);
      caffe_set(ones_.count(), Dtype(1), ones_.mutable_cpu_data());

      LOG(INFO) << "Using predefined per-pixel mean from proto";
      pixel_rgb_mean_from_proto_.Reshape(1,3,1,1);
      for(int i=0; i<3; i++)
          pixel_rgb_mean_from_proto_.mutable_cpu_data()[i]=aug_.mean().Get(i);
    }
    
    noise_.reset(new SyncedMemory(top[0]->count() / top[0]->num() * sizeof(Dtype)));

    *(this->blobs_[0]->mutable_cpu_data()) = 0;
    
//     LOG(INFO) << "DEBUG: Reshape done";
}

data_augmentation_layer.cu

用於定義在gpu上的運算。

Forward_gpu

首先做擴張前的檢查工作:

  1. 確定輸入和輸出的尺寸;
  2. 檢查輸入與輸出的num是否相等;
  3. 檢查是否有NAN和特別大的值。

隨後進行擴張(程式設定只有在設定了擷取的條件下擴張資料):

  1. 如果沒有輸入的擴張引數,就根據要求(層引數)生成一組,並將所做的變換記錄在log中;
  2. 依照引數求得變換矩陣;
  3. 根據需要,依變換矩陣擴張資料。

如果沒有設定擷取條件,則直接將輸入複製給輸出。

對擴張後的資料進行歸一化處理(減去均值)。