【caffe】caffe結構解析(二)
阿新 • • 發佈:2019-01-04
這次來寫寫caffe是如何來solve的
在成員函式Solve()內部,
template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
......
// For a network that is trained by the solver, no bottom or top vecs
// should be given, and we will just provide dummy vecs.
int start_iter = iter_;
//開始迭代
Step(param_.max_iter() - iter_);
......
}
下面我們看一下Solver::Step()函式內部實現情況,
template <typename Dtype>
void Solver<Dtype>::Step(int iters)
{
// 起始迭代步數
const int start_iter = iter_;
// 終止迭代步數
const int stop_iter = iter_ + iters;
// 判斷是否已經完成設定步數
while (iter_ < stop_iter)
{
// 將net_中的Bolb梯度引數置為零
net_->ClearParamDiffs();
...
// accumulate the loss and gradient
Dtype loss = 0;
for (int i = 0; i < param_.iter_size(); ++i)
{
// 正向傳導和反向傳導,並計算loss
loss += net_->ForwardBackward();
}
loss /= param_.iter_size();
// 為了輸出結果平滑,將臨近的average_loss個loss數值進行平均,儲存在成員變數smoothed_loss_中
UpdateSmoothedLoss(loss, start_iter, average_loss);
// BP演算法更新權重
ApplyUpdate();
// Increment the internal iter_ counter -- its value should always indicate
// the number of times the weights have been updated.
++iter_;
}
}
while迴圈中先呼叫了網路類Net::ForwardBackward()成員函式進行正向傳導和反向傳導,並計算loss
Dtype ForwardBackward() {
Dtype loss;
//正向傳導
Forward(&loss);
//反向傳導
Backward();
return loss;
}
而Fordward函式中呼叫了ForwardFromTo
template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) {
if (loss != NULL) {
*loss = ForwardFromTo(0, layers_.size() - 1);
} else {
ForwardFromTo(0, layers_.size() - 1);
}
return net_output_blobs_;
}
而FordwardFromTo又呼叫了每個layer的Fordward
template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
CHECK_GE(start, 0);
CHECK_LT(end, layers_.size());
Dtype loss = 0;
for (int i = start; i <= end; ++i) {
// LOG(ERROR) << "Forwarding " << layer_names_[i];
//每個layer的前向傳導
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
loss += layer_loss;
if (debug_info_) { ForwardDebugInfo(i); }
}
return loss;
}
雖然layer這個基類的Forward函式不是虛擬函式,但是在其內部包裝了虛擬函式Forward_cpu()和Forward_gpu(),分別對應CPU版本和GPU版本。其中Forward_cpu()為父類Layer的純虛擬函式,必須被子類過載。而Forward_gpu()在父類Layer中的實現為直接呼叫Forward_cpu(),於是該虛擬函式的實現為可選。總的來說,正因為這兩個虛擬函式,所以不同層有不同的正向傳導計算方法。
// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Lock during forward to ensure sequential forward
Lock();
Dtype loss = 0;
Reshape(bottom, top);
switch (Caffe::mode()) {
case Caffe::CPU:
//呼叫每個layer的子類的Forward_cpu
Forward_cpu(bottom, top);
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->cpu_data();
const Dtype* loss_weights = top[top_id]->cpu_diff();
loss += caffe_cpu_dot(count, data, loss_weights);
}
break;
case Caffe::GPU:
Forward_gpu(bottom, top);
#ifndef CPU_ONLY
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->gpu_data();
const Dtype* loss_weights = top[top_id]->gpu_diff();
Dtype blob_loss = 0;
caffe_gpu_dot(count, data, loss_weights, &blob_loss);
loss += blob_loss;
}
#endif
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
Unlock();
return loss;
}
反向傳導函式Backward()呼叫了BackwardFromTo(int start, int end)函式
template <typename Dtype>
void Net<Dtype>::Backward()
{
BackwardFromTo(layers_.size() - 1, 0);
}
template <typename Dtype>
void Net<Dtype>::BackwardFromTo(int start, int end)
{
CHECK_GE(end, 0);
CHECK_LT(start, layers_.size());
// 倒過來逐層傳導
for (int i = start; i >= end; --i)
{
if (layer_need_backward_[i])
{
// 與正向傳導函式類似,雖然Backward()不是虛擬函式,但是包裝了虛擬函式Backward_cpu()和Backward_gpu(),因此不同層有不同的計算方法
// 注意反向傳導比正向傳導多了一個引數bottom_need_backward_。在實現反向傳導時,首先判斷當前層是否需要反向傳導的層,不需要則直接返回
layers_[i]->Backward(top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
if (debug_info_)
{
BackwardDebugInfo(i);
}
}
}
}
正向傳導和反向傳導結束後,再呼叫SGDSolver::ApplyUpdate()成員函式進行權重更新。
template <typename Dtype>
void SGDSolver<Dtype>::ApplyUpdate()
{
// 獲取當前學習速率
Dtype rate = GetLearningRate();
if (this->param_.display() && this->iter_ % this->param_.display() == 0)
{
LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
}
// 在計算當前梯度的時候,如果該值超過了閾值clip_gradients,則將梯度直接設定為該閾值
// 此處閾值設為-1,即不起作用
ClipGradients();
// 逐層更新網路中的可學習層
for (int param_id = 0; param_id < this->net_->learnable_params().size();
++param_id)
{
// 歸一化
Normalize(param_id);
// L2範數正則化新增衰減權重
Regularize(param_id);
// 隨機梯度下降法計算更新值
ComputeUpdateValue(param_id, rate);
}
// 更新權重
this->net_->Update();
}
最後將迭代次數++iter_,繼續while迴圈,直到迭代次數完成。
這就是整個網路的訓練過程。
感謝部落格Rolin的專欄