openpose訓練程式碼（一）

阿新 • • 發佈：2019-01-01

openspoe本身是很繁雜的，包含了人體姿態估計、手勢估計、臉部關鍵點提取，還有3Dpose，是在caffe上再做的一層封裝，但是如果我們實際要去用的話，很多其實都是不需要的，比如openpose裡面的多執行緒，GUI等等，我們只需要關注一些核心的東西就好了。
在這裡，我們只關心openpose中的人體關鍵點估計，其實在上一篇部落格中，我們可以大致瞭解到，Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields就是CVPR6016的CPM加上PAF，inference是很直觀的，就是提取關鍵點，算PAF積分，再把關鍵點放到每個group（就是確定是不是同一個人）完成多人的姿態估計。

訓練程式碼，其實主要就是看資料準備和資料讀取，主要包括幾個檔案：
資料讀取檔案：

cpm_data_layer.cpp
cpm_data_transformer.cpp

資料準備檔案：

genCOCOMask.m
genJSON.m
genLMDB.py
getANNO.m

cpm_data_layer和cpm_data_transformer都是在caffe中實現的，要理清楚這兩個檔案，我們需要先看一下資料準備是怎麼做的，這裡，也只是關注LMDB檔案是怎麼生成的，因為其他的都比較簡單（其實生成LMDB也蠻簡單的，但是作者這部分寫的有點亂，需要靜心好好梳理）可以自行查閱。
在genLMDB.py中，把事先處理好的資料都寫入LMDB中，其中有一個函式writeLMDB,這個函式就是逐行，逐頁面（這裡的頁面可以理解長channel，因為在讀取的時候都是利用指標移動）來寫入的：

def writeLMDB(datasets, lmdb_path, validation):
    env = lmdb.open(lmdb_path, map_size=int(1e12))  # 需要先建立一個空資料夾用來放LMDB檔案，大概需要140G
    txn = env.begin(write=True)
    data = []
    numSample = 0

    for d in range(len(datasets)):
        if(datasets[d] == "MPI"):
            print datasets[d]
            with 
 open('MPI.json') as data_file:
                data_this = json.load(data_file)
                data_this = data_this['root']
                data = data + data_this
            numSample = len(data)
            #print data
            print numSample
        elif(datasets[d] == "COCO"):   # 讀json檔案
            print datasets[d]
            with open('dataset/COCO/json/COCO.json') as data_file:
                data_this = json.load(data_file)
                data_this = data_this['root']
                data = data + data_this
            numSample = len(data)
            #print data
            print numSample

    random_order = np.random.permutation(numSample).tolist()

    isValidationArray = [data[i]['isValidation'] for i in range(numSample)];
    if(validation == 1):
        totalWriteCount = isValidationArray.count(0.0);
    else:
        totalWriteCount = len(data)
    print totalWriteCount;
    writeCount = 0

    for count in range(numSample):# numSample
        #idx = random_order[count]
                idx = 3
        if (data[idx]['isValidation'] != 0 and validation == 1):
            print '%d/%d skipped' % (count,idx)
            continue

        if "MPI" in data[idx]['dataset']:
            path_header = 'dataset/MPI/images/'
        elif "COCO" in data[idx]['dataset']:
            path_header = '/proj/Sunjiarui/fcm_pose_train/training/dataset/COCO/images/'

        print os.path.join(path_header, data[idx]['img_paths'])
        img = cv2.imread(os.path.join(path_header, data[idx]['img_paths']))
        #print data[idx]['img_paths']
        img_idx = data[idx]['img_paths'][-16:-3];
        #print img_idx
        # 做mask_all 和mask_miss 這裡是因為有一些人比較小，沒有標註，但是又存在，所以才有這一步
        if "COCO_val" in data[idx]['dataset']:  
            mask_all = cv2.imread(path_header+'mask2014/val2014_mask_all_'+img_idx+'png', 0)
            mask_miss = cv2.imread(path_header+'mask2014/val2014_mask_miss_'+img_idx+'png', 0)
            #print path_header+'mask2014/val2014_mask_miss_'+img_idx+'png'
        elif "COCO" in data[idx]['dataset']:
            mask_all = cv2.imread(path_header+'mask2014/train2014_mask_all_'+img_idx+'png', 0)
            mask_miss = cv2.imread(path_header+'mask2014/train2014_mask_miss_'+img_idx+'png', 0)
            #print path_header+'mask2014/train2014_mask_miss_'+img_idx+'png'
        elif "MPI" in data[idx]['dataset']:
            img_idx = data[idx]['img_paths'][-13:-3];
            #print img_idx
            mask_miss = cv2.imread('dataset/MPI/masks/mask_'+img_idx+'jpg', 0)
            #mask_all = mask_miss

        height = img.shape[0]
        width = img.shape[1]
        if(width < 64):
            img = cv2.copyMakeBorder(img,0,0,0,64-width,cv2.BORDER_CONSTANT,value=(128,128,128))
            print 'saving padded image!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
            cv2.imwrite('padded_img.jpg', img)
            width = 64
            # no modify on width, because we want to keep information
        meta_data = np.zeros(shape=(height,width,1), dtype=np.uint8)
        #print type(img), img.shape
        #print type(meta_data), meta_data.shape
        clidx = 0 # current line index
        # dataset name (string)
        for i in range(len(data[idx]['dataset'])):
            meta_data[clidx][i] = ord(data[idx]['dataset'][i])
                        print 'type()=', type(ord(data[idx]['dataset'][i]))

        # 開始準備mata資訊
        clidx = clidx + 1
        # image height, image width
        height_binary = float2bytes(data[idx]['img_height'])
        for i in range(len(height_binary)):
            meta_data[clidx][i] = ord(height_binary[i])
        width_binary = float2bytes(data[idx]['img_width'])
                print 'type(width_binary)=',type(width_binary)
        for i in range(len(width_binary)):
            meta_data[clidx][4+i] = ord(width_binary[i])
        clidx = clidx + 1
        # (a) isValidation(uint8), numOtherPeople (uint8), people_index (uint8), annolist_index (float), writeCount(float), totalWriteCount(float)
        meta_data[clidx][0] = data[idx]['isValidation']
        meta_data[clidx][1] = data[idx]['numOtherPeople']
        meta_data[clidx][2] = data[idx]['people_index']
                print 'type() =', type(data[idx]['isValidation'])
                print 'data numOther = ',data[idx]['numOtherPeople']

        annolist_index_binary = float2bytes(data[idx]['annolist_index'])
        for i in range(len(annolist_index_binary)): # 3,4,5,6
            meta_data[clidx][3+i] = ord(annolist_index_binary[i])
        count_binary = float2bytes(float(writeCount)) # note it's writecount instead of count!
        for i in range(len(count_binary)):
            meta_data[clidx][7+i] = ord(count_binary[i])
        totalWriteCount_binary = float2bytes(float(totalWriteCount))
        for i in range(len(totalWriteCount_binary)):
            meta_data[clidx][11+i] = ord(totalWriteCount_binary[i])
        nop = int(data[idx]['numOtherPeople'])
        clidx = clidx + 1
        # (b) objpos_x (float), objpos_y (float)
        objpos_binary = float2bytes(data[idx]['objpos'])
        for i in range(len(objpos_binary)):
            meta_data[clidx][i] = ord(objpos_binary[i])
        clidx = clidx + 1
        # (c) scale_provided (float)
        scale_provided_binary = float2bytes(data[idx]['scale_provided'])
        for i in range(len(scale_provided_binary)):
            meta_data[clidx][i] = ord(scale_provided_binary[i])
        clidx = clidx + 1
        # (d) joint_self (3*16) (float) (3 line)
        joints = np.asarray(data[idx]['joint_self']).T.tolist() # transpose to 3*16
        for i in range(len(joints)):
            row_binary = float2bytes(joints[i])
            for j in range(len(row_binary)):
                meta_data[clidx][j] = ord(row_binary[j])
            clidx = clidx + 1
        # (e) check nop, prepare arrays
                print 'nop=',nop
        if(nop!=0):
            if(nop==1):
                joint_other = [data[idx]['joint_others']]
                objpos_other = [data[idx]['objpos_other']]
                scale_provided_other = [data[idx]['scale_provided_other']]
                                print 'joint_other=',joint_other
            else:
                joint_other = data[idx]['joint_others']
                objpos_other = data[idx]['objpos_other']
                scale_provided_other = data[idx]['scale_provided_other']
                                print 'joint_others2 =', joint_other
            # (f) objpos_other_x (float), objpos_other_y (float) (nop lines)
            for i in range(nop):
                objpos_binary = float2bytes(objpos_other[i])
                for j in range(len(objpos_binary)):
                    meta_data[clidx][j] = ord(objpos_binary[j])
                clidx = clidx + 1
            # (g) scale_provided_other (nop floats in 1 line)
            scale_provided_other_binary = float2bytes(scale_provided_other)
            for j in range(len(scale_provided_other_binary)):
                meta_data[clidx][j] = ord(scale_provided_other_binary[j])
            clidx = clidx + 1
            # (h) joint_others (3*16) (float) (nop*3 lines)
            for n in range(nop):
                joints = np.asarray(joint_other[n]).T.tolist() # transpose to 3*16
                                print 'joints=',joints
                                print 'joint_other[n]=', joint_other[n]
                for i in range(len(joints)):
                    row_binary = float2bytes(joints[i])
                    for j in range(len(row_binary)):
                        meta_data[clidx][j] = ord(row_binary[j])
                    clidx = clidx + 1

        # print meta_data[0:12,0:48]
        # total 7+4*nop lines
        # lmdb排列的順序一定要記清楚，這個在讀取資料的時候很重要，在C++程式碼中相關聯的就是指標的偏移量
        if "COCO" in data[idx]['dataset']:
            img4ch = np.concatenate((img, meta_data, mask_miss[...,None], mask_all[...,None]), axis=2)
            #img4ch = np.concatenate((img, meta_data, mask_miss[...,None]), axis=2)
        elif "MPI" in data[idx]['dataset']:
            img4ch = np.concatenate((img, meta_data, mask_miss[...,None]), axis=2)

        img4ch = np.transpose(img4ch, (2, 0, 1))
        print img4ch.shape

        datum = caffe.io.array_to_datum(img4ch, label=0)
        key = '%07d' % writeCount
        txn.put(key, datum.SerializeToString())
        if(writeCount % 1000 == 0):
            txn.commit()
            txn = env.begin(write=True)
        print '%d/%d/%d/%d' % (count,writeCount,idx,numSample)
        writeCount = writeCount + 1

    txn.commit()
    env.close()

在上述Python程式碼過後，就會生成訓練所需要的LMDB檔案，在實際的使用過程中，需要重新寫caffe的data_layer,關於caffe的data_layer ,可以參考我之前的一篇部落格： http://mp.blog.csdn.net/mdeditor/77987504

下面是cpm_data_layer和cpm_data_transformer，其實cpm_data_layer主要就是layer的建立，主要的資料轉化都是在cpm_data_transformer中完成的。
先看cpm_data_layer的setup函式（程式碼有些細微地方我可能改過）：

template <typename Dtype>
void CPMDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  cpm_data_transformer_.reset(
     new CPMDataTransformer<Dtype>(cpm_transform_param_, this->phase_));
  cpm_data_transformer_->InitRand();


  // Read a data point, and use it to initialize the top blob.
  Datum& datum = *(reader_.full().peek());
  LOG(INFO) << datum.height() << " " << datum.width() << " " << datum.channels();

  bool force_color = this->layer_param_.data_param().force_encoded_color();
  if ((force_color && DecodeDatum(&datum, true)) ||
      DecodeDatumNative(&datum)) {
    LOG(INFO) << "Decoding Datum";
  }

  // image
  const int crop_size = this->layer_param_.cpm_transform_param().crop_size();
  const int batch_size = this->layer_param_.data_param().batch_size();
  if (crop_size > 0) {
    // top[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
    // for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
    //   this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), crop_size, crop_size);
    // }
    // //this->transformed_data_.Reshape(1, 4, crop_size, crop_size);
    // this->transformed_data_.Reshape(1, 6, crop_size, crop_size);
  } 
  else {
    const int height = this->phase_ != TRAIN ? datum.height() :
      this->layer_param_.cpm_transform_param().crop_size_y();
    const int width = this->phase_ != TRAIN ? datum.width() :
      this->layer_param_.cpm_transform_param().crop_size_x();
    LOG(INFO) << "PREFETCH_COUNT is " << this->PREFETCH_COUNT;  // asynchronously if to GPU memory
    top[0]->Reshape(batch_size, datum.channels(), height, width);
    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
      this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), height, width);   // 10,6,368,368
    }
    //this->transformed_data_.Reshape(1, 4, height, width);
    this->transformed_data_.Reshape(1, datum.channels(), height, width);  // 1,6,368,368
  }
  LOG(INFO) << "output data size: " << top[0]->num() << ","              
      << top[0]->channels() << "," << top[0]->height() << ","
      << top[0]->width();   // 10,6,368,368

  // label
  if (this->output_labels_) {
    const int stride = this->layer_param_.cpm_transform_param().stride();  // 8,重要
    const int height = this->phase_ != TRAIN ? datum.height() :
      this->layer_param_.cpm_transform_param().crop_size_y();
    const int width = this->phase_ != TRAIN ? datum.width() :
      this->layer_param_.cpm_transform_param().crop_size_x();

    int num_parts = this->layer_param_.cpm_transform_param().num_parts();  // 56
    top[1]->Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride);
    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
      this->prefetch_[i].label_.Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride);  // 10,114,46,46
    }
    this->transformed_label_.Reshape(1, 2*(num_parts+1), height/stride, width/stride);  // 1,114,46,46
  }
}

在這個函式中，主要是就一些超引數的讀取，和資料輸出格式的規定。
關鍵的是load_batch 函式，我截取了一部分：

    // Apply data transformations (mirror, scale, crop...)
    timer.Start();
    const int offset_data = batch->data_.offset(item_id);
    const int offset_label = batch->label_.offset(item_id);
    this->transformed_data_.set_cpu_data(top_data + offset_data);
    this->transformed_label_.set_cpu_data(top_label + offset_label);
    if (datum.encoded()) {
      this->cpm_data_transformer_->Transform(cv_img, &(this->transformed_data_));
    } else {
      this->cpm_data_transformer_->Transform_nv(datum, 
        &(this->transformed_data_),
        &(this->transformed_label_), cnt);
      ++cnt;
    }
    // if (this->output_labels_) {
    //   top_label[item_id] = datum.label();
    // }
    trans_time += timer.MicroSeconds();

openpose訓練程式碼（一）

openpose訓練程式碼（一）

openpose訓練程式碼（二）

Tesseract-OCR 訓練教程（一）

OpenCV下車牌定位演算法實現程式碼（一）

關於MATLAB轉C++程式碼（一）

caffe模型訓練全過程（一）指令碼、資料準備與製作

陣列之蛇型矩陣程式碼（一）

深度學習分散式訓練實戰（一）

unity 觸屏程式碼（一）物體運動

javapoet-讓你不再書寫無聊的程式碼（一）

MFC OnFileNew OnFileOpen過程分析程式碼（一）

TypeScrip入門—環境搭建和第一個TS程式碼（一）

JDBC連線資料庫程式碼（一） -- 一個簡單的獲取資料庫表單

專案中遇到一些問題及解決程式碼（一）

關於評論話題挖掘的研究及其實現程式碼（一）LDA

如何寫出高質量c程式碼（一）C的陷阱

Shiro框架從入門到實戰程式碼（一）Shiro簡介和基礎應用

紅黑樹插入與刪除演算法實現+程式碼（一）

小碼農的程式碼（一）----------SpringJDBC的使用

AI：拿來主義——預訓練網路（一）

openpose訓練程式碼（一）

相關推薦