1. 程式人生 > >【caffe】caffe結構解析(二)

【caffe】caffe結構解析(二)

這次來寫寫caffe是如何來solve的
在成員函式Solve()內部,

template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
  ......
  // For a network that is trained by the solver, no bottom or top vecs
  // should be given, and we will just provide dummy vecs.
  int start_iter = iter_;
  //開始迭代
Step(param_.max_iter() - iter_); ...... }

下面我們看一下Solver::Step()函式內部實現情況,

template <typename Dtype>  
void Solver<Dtype>::Step(int iters)  
{  
    // 起始迭代步數  
    const int start_iter = iter_;  
    // 終止迭代步數  
    const int stop_iter = iter_ + iters;  

    // 判斷是否已經完成設定步數  
    while
(iter_ < stop_iter) { // 將net_中的Bolb梯度引數置為零 net_->ClearParamDiffs(); ... // accumulate the loss and gradient Dtype loss = 0; for (int i = 0; i < param_.iter_size(); ++i) { // 正向傳導和反向傳導,並計算loss loss += net_->ForwardBackward(); } loss /= param_.iter_size(); // 為了輸出結果平滑,將臨近的average_loss個loss數值進行平均,儲存在成員變數smoothed_loss_中
UpdateSmoothedLoss(loss, start_iter, average_loss); // BP演算法更新權重 ApplyUpdate(); // Increment the internal iter_ counter -- its value should always indicate // the number of times the weights have been updated. ++iter_; } }

while迴圈中先呼叫了網路類Net::ForwardBackward()成員函式進行正向傳導和反向傳導,並計算loss

  Dtype ForwardBackward() {
    Dtype loss;
    //正向傳導
    Forward(&loss);
    //反向傳導
    Backward();
    return loss;
  }

而Fordward函式中呼叫了ForwardFromTo

template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) {
  if (loss != NULL) {
    *loss = ForwardFromTo(0, layers_.size() - 1);
  } else {
    ForwardFromTo(0, layers_.size() - 1);
  }
  return net_output_blobs_;
}

而FordwardFromTo又呼叫了每個layer的Fordward

template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
  CHECK_GE(start, 0);
  CHECK_LT(end, layers_.size());
  Dtype loss = 0;
  for (int i = start; i <= end; ++i) {
    // LOG(ERROR) << "Forwarding " << layer_names_[i];
    //每個layer的前向傳導
    Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
    loss += layer_loss;
    if (debug_info_) { ForwardDebugInfo(i); }
  }
  return loss;
}

雖然layer這個基類的Forward函式不是虛擬函式,但是在其內部包裝了虛擬函式Forward_cpu()和Forward_gpu(),分別對應CPU版本和GPU版本。其中Forward_cpu()為父類Layer的純虛擬函式,必須被子類過載。而Forward_gpu()在父類Layer中的實現為直接呼叫Forward_cpu(),於是該虛擬函式的實現為可選。總的來說,正因為這兩個虛擬函式,所以不同層有不同的正向傳導計算方法。

// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  // Lock during forward to ensure sequential forward
  Lock();
  Dtype loss = 0;
  Reshape(bottom, top);
  switch (Caffe::mode()) {
  case Caffe::CPU:
    //呼叫每個layer的子類的Forward_cpu
    Forward_cpu(bottom, top);
    for (int top_id = 0; top_id < top.size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = top[top_id]->count();
      const Dtype* data = top[top_id]->cpu_data();
      const Dtype* loss_weights = top[top_id]->cpu_diff();
      loss += caffe_cpu_dot(count, data, loss_weights);
    }
    break;
  case Caffe::GPU:
    Forward_gpu(bottom, top);
#ifndef CPU_ONLY
    for (int top_id = 0; top_id < top.size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = top[top_id]->count();
      const Dtype* data = top[top_id]->gpu_data();
      const Dtype* loss_weights = top[top_id]->gpu_diff();
      Dtype blob_loss = 0;
      caffe_gpu_dot(count, data, loss_weights, &blob_loss);
      loss += blob_loss;
    }
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
  Unlock();
  return loss;
}

反向傳導函式Backward()呼叫了BackwardFromTo(int start, int end)函式

template <typename Dtype>  
void Net<Dtype>::Backward()  
{  
    BackwardFromTo(layers_.size() - 1, 0);  
} 
template <typename Dtype>  
void Net<Dtype>::BackwardFromTo(int start, int end)  
{  
  CHECK_GE(end, 0);  
  CHECK_LT(start, layers_.size());  
  // 倒過來逐層傳導  
  for (int i = start; i >= end; --i)  
  {  
    if (layer_need_backward_[i])  
    {  
      // 與正向傳導函式類似,雖然Backward()不是虛擬函式,但是包裝了虛擬函式Backward_cpu()和Backward_gpu(),因此不同層有不同的計算方法  
      // 注意反向傳導比正向傳導多了一個引數bottom_need_backward_。在實現反向傳導時,首先判斷當前層是否需要反向傳導的層,不需要則直接返回  
      layers_[i]->Backward(top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);  
      if (debug_info_)  
      {  
        BackwardDebugInfo(i);  
      }  
    }  
  }  
}  

正向傳導和反向傳導結束後,再呼叫SGDSolver::ApplyUpdate()成員函式進行權重更新。

template <typename Dtype>  
void SGDSolver<Dtype>::ApplyUpdate()  
{  
    // 獲取當前學習速率  
    Dtype rate = GetLearningRate();  
    if (this->param_.display() && this->iter_ % this->param_.display() == 0)  
    {  
        LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;  
    }  

    // 在計算當前梯度的時候,如果該值超過了閾值clip_gradients,則將梯度直接設定為該閾值  
    // 此處閾值設為-1,即不起作用  
    ClipGradients();  

    // 逐層更新網路中的可學習層  
    for (int param_id = 0; param_id < this->net_->learnable_params().size();  
       ++param_id)  
    {  
        // 歸一化  
        Normalize(param_id);  
        // L2範數正則化新增衰減權重  
        Regularize(param_id);  
        // 隨機梯度下降法計算更新值  
        ComputeUpdateValue(param_id, rate);  
    }  
    // 更新權重  
    this->net_->Update();  
} 

最後將迭代次數++iter_,繼續while迴圈,直到迭代次數完成。
這就是整個網路的訓練過程。
感謝部落格Rolin的專欄