Train C4: Real-time pedestrian detection models——C4行人檢測演算法訓練過程

阿新 • • 發佈：2018-12-05

1.樣本的準備

樣本可以使用之前訓練的模型，通過OpenCV的imwrite截圖儲存然後再人工篩選，這個C4-Real-time-pedestrian-detection工程裡面我有實現。也可以自己寫一個程式，手動截圖。將正樣本都裁剪成只包含一個人的並歸一化到108*36解析度大小，負樣本也是要歸一化到108*36大小。

把已經準備好的樣本，檔名順序命名（如1.jpg、2.jpg、3.jpg……），然後將正樣本放入C4_SVM_Train_Data\PositiveSamples工程目錄下，負樣本放入C4_SVM_Train_Data\NegativeSamples工程目錄下，修改工程檔案C4_SVM_Train_Data.cpp中main函式里正負樣本引數PosSampleNum、NegSampleNum，設定為實際使用的數量。編譯執行程式即可生成訓練所需的樣本檔案samples.txt。程式碼是參考Jianxin Wu的論文實現的，如下所示

#include "stdafx.h"
#include<opencv2/core/core.hpp>
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/ml/ml.hpp>

#include<iostream>
#include<sstream>
#include<vector>
#include<string>
#include <fstream>
#include <bitset>

using namespace cv;
using namespace std;

void calcFeatures(const Mat &imgSrc, vector<float> &features);
void ComputeSobel(const Mat &gray_image, Mat &sobel_image);
void ComputeCT(const Mat &sobel_image, Mat &CT_image);
void generate_sample_list(int posNum, int negNum);
void generateTrainingData(int nClass, int nDims, int posNum, int negNum);

int _tmain(int argc, _TCHAR* argv[])
{
	int PosSampleNum = 5103;					//正樣本個數
	int NegSampleNum = 3102;					//負樣本個數
	int nSamples = PosSampleNum + NegSampleNum;	//樣本總數
	int nDims = 6144;							//特徵維數
	int nClass = 2;								//總類別數

	generateTrainingData(nClass, nDims, PosSampleNum, NegSampleNum);

	waitKey(0);
	return 0;
}

void calcFeatures(const Mat &imgSrc, vector<float> &features)
{
	if (imgSrc.empty())
	{
		cout << "Invalid Input!" << endl;
		return ;
	}

	Mat gray_image(imgSrc.size(), CV_8UC1);
	cvtColor(imgSrc, gray_image, CV_BGR2GRAY);

	Mat sobel_image(gray_image.size(), CV_32FC1);
	ComputeSobel(gray_image, sobel_image);
	//imshow("Sobel-Image", sobel_image);

	Mat CT_feature_image(gray_image.size(), CV_32FC1);
	ComputeCT(sobel_image, CT_feature_image);
	//imshow("CT_feature",CT_feature_image);
	
	//檢測視窗的大小為36*108，然後將該檢測視窗劃分為4*9個block，每個block的大小是9*12
	//每相鄰的4個block作為一個super-block，用該super-block來提取CENTRIST(Ct_feature)特徵，
	//橫向移動步長為9，縱向移動步長為12，每個super-block橫向可以移動3下，縱向可以移動8下，
	//一個檢測視窗一共可以產生（9-1）*（4-1）= 8*3 = 24個super-block，
	//計算每個super-block的直方圖，統計[0-255]共256個特徵值每個值出現的次數，最終將生成256*24=6144維的特徵。
	int width = 36;
	int height = 108;
	int stepsize = 2;
	int baseflength = 256;	//[0-255]
	int xdiv = 9;
	int ydiv = 12;
	int EXT = 1;

	MatND hist;
	int hist_size[1];
	float hranges[2];
	const float* ranges[1];
	int channels[1];

	hist_size[0] = 256;
	hranges[0] = 0.0;
	hranges[1] = 255.0;
	ranges[0] = hranges;
	channels[0] = 0;

	for (int i = 0; i < height - ydiv; i += ydiv)
	{
		for (int j = 0; j < width - xdiv; j += xdiv)
		{
			Rect super_block_rect(j, i, 2 * xdiv, 2 * ydiv);
			Mat super_block_image = CT_feature_image(super_block_rect);
			calcHist(&super_block_image, 1, channels, Mat(), hist, 1, hist_size, ranges, true, false);
			for (int k = 0; k < 256; k++)
			{
				features.push_back(hist.at<float>(k));
			}
		}
	}
}

void ComputeSobel(const Mat &gray_image, Mat &sobel_image)
{
	for (int i = 1; i < gray_image.rows - 1; i++)
	{
		for (int j = 1; j < gray_image.cols - 1; j++)
		{
			int Gx = (int)gray_image.at<uchar>(i - 1, j - 1) * (-1)+
				(int)gray_image.at<uchar>(i - 1, j) * (-2)+
				(int)gray_image.at<uchar>(i - 1, j + 1) * (-1)+
				(int)gray_image.at<uchar>(i + 1, j - 1)+
				(int)gray_image.at<uchar>(i + 1, j) * 2+
				(int)gray_image.at<uchar>(i + 1, j + 1);

			int Gy = (int)gray_image.at<uchar>(i - 1, j - 1) * (-1)+
				(int)gray_image.at<uchar>(i, j - 1) * (-2)+
				(int)gray_image.at<uchar>(i + 1, j - 1) * (-1)+
				(int)gray_image.at<uchar>(i - 1, j + 1)+
				(int)gray_image.at<uchar>(i, j + 1) * 2+
				(int)gray_image.at<uchar>(i + 1, j + 1);

			float G = (float)(Gx * Gx + Gy * Gy);

			sobel_image.at<float>(i, j) = G;
		}
	}
}

void ComputeCT(const Mat &sobel_image, Mat &CT_image)
{
	for (int i = 2; i < sobel_image.rows - 2; i++)
	{
		for (int j = 2; j < sobel_image.cols - 2; j++)
		{
			int index = 0;
			//if與多個else if，只會執行其中一個條件，這裡被自己挖的坑耽誤了好幾天，現在改寫為多個if語句
			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i - 1, j - 1))
			{
				index += 0x80;	//128
			}

			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i - 1, j))
			{
				index += 0x40;	//64
			}

			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i - 1, j + 1))
			{
				index += 0x20;	//32
			}

			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i, j - 1))
			{
				index += 0x10;	//16
			}

			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i, j + 1))
			{
				index += 0x08;	//8
			}

			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i + 1, j - 1))
			{
				index += 0x04;	//4
			}

			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i + 1, j))
			{
				index += 0x02;	//2
			}

			if (sobel_image.at<float>(i, j) <= sobel_image.at<float>(i + 1, j + 1))
			{
				index += 0x01;	//1
			}

			CT_image.at<float>(i, j) = (float)index;
		}
	}
}

void generate_sample_list(int posNum, int negNum)
{
	char imageName[100];
	FILE* pos_fp;
	pos_fp = fopen("PositiveSamplesList.txt","wb+");
	for (int i = 1; i <= posNum; i++)
	{
		sprintf(imageName,"%d.jpg",i);
		fprintf(pos_fp,"%s\r\n",imageName);
	}
	fclose(pos_fp);

	FILE* neg_fp;
	neg_fp = fopen("NegativeSamplesList.txt","wb+");
	for (int i = 1; i <= negNum; i++)
	{
		sprintf(imageName,"%d.jpg",i);
		fprintf(neg_fp,"%s\r\n",imageName);
	}
	fclose(neg_fp);
}

void generateTrainingData(int nClass, int nDims, int posNum, int negNum)
{
	int number = 0;
	int nCount = 0;
	Mat input_image;
	vector<float> features;
	vector<float> labels;

	generate_sample_list(posNum, negNum);//生成正負樣本檔名列表
	string ImgName;//圖片名(絕對路徑)
	ifstream finPos("PositiveSamplesList.txt");//正樣本圖片的檔名列表
	ifstream finNeg("NegativeSamplesList.txt");//負樣本圖片的檔名列表

	Mat sampleFeatureMat;//所有訓練樣本的特徵向量組成的矩陣，行數等於所有樣本的個數，列數等於HOG描述子維數	
	Mat sampleLabelMat;//訓練樣本的類別向量，行數等於所有樣本的個數，列數等於1；1表示有人，0表示無人

	for(int i = 0; i < posNum && getline(finPos,ImgName); i++)
	{
		ImgName = "..\\pos-2\\" + ImgName;//加上正樣本的路徑名
		input_image = imread(ImgName);//讀取圖片
		calcFeatures(input_image, features);
	}
	cout << "Finished processing positive samlpes !" << endl;

	for (int j = 0; j < negNum && getline(finNeg,ImgName); j++)
	{
		ImgName = "..\\neg-2\\" + ImgName;//加上正樣本的路徑名
		input_image = imread(ImgName);//讀取圖片
		calcFeatures(input_image, features);
	}
	cout << "Finished processing negative samlpes !" << endl;

	//write the feature data into a txt file, the format must refer to libliner's reference 
	FILE * fp;
	fp = fopen("samples.txt","wb+");;//建立一個txt檔案，用於寫入資料的，每次寫入資料追加到檔案尾

	for (int m = 0; m < (posNum + negNum); m++)
	{
		if (m < posNum)
		{
			int lable = 1;		//	positive sample lable 1
			fprintf(fp,"%d ",lable);
		}
		else
		{
			int lable = -1;		//	negative sample lable -1
			fprintf(fp,"%d ",lable); 
		}

		for(int n = 0; n < nDims; n++)
		{
			fprintf(fp,"%d:%f ",(n+1),features.at(m * nDims + n));
		}
		fprintf(fp,"\r\n");
	}

	cout << "Generate Training Data Complete!" << endl << endl;
}

2.訓練工具的準備

liblinear-2.20、libHIK-2.07、OpenCV 2.4.9。

liblinear-2.20直接解壓縮就可以使用。

libHIK-2.07解壓縮之後需要編譯，編譯過程可以參考libHIK-2.07\libHIK目錄中的libHIK_v2.pdf，在3.3 Installation in Windows 中有具體的說明。

首先，用記事本開啟libHIK-2.07\libHIK中的Makefile.win檔案，將第13行OPENCV_INCLUDE的條目分別修改為本機的（下面是我的）

D:\Program Files (x86)\opencv\build\include\opencv

D:\Program Files (x86)\opencv\build\include

然後，修改第14行的OPENCV_LIB條目為本機的（下面是我的）

D:\Program Files (x86)\opencv\build\x64\vc10\lib

接著，修改第80行的opencv_highgui231.lib為opencv_highgui249.lib

修改第80行的opencv_core231.lib為opencv_core249.lib

修改第80行的opencv_imgproc231.lib為opencv_imgproc249.lib

最後，開啟Visual Studio x64 Win64命令提示（2010），在命令列將目錄切換到libHIK-2.07\libHIK下，執行命令WinMake.bat 即可完成編譯。

3.訓練過程

首先，將步驟1中生成的samples.txt複製到liblinear-2.20\windows目錄中，然後在cmd命令列裡輸入命令：

train.exe -s 2 -e 0.000001 -B 1 samples.txt combined.txt.model

這樣就生成了第1個行人檢測模型。

然後，將步驟1中生成的samples.txt複製到libHIK-2.07\libHIK\windows目錄中，然後在cmd命令列裡輸入命令：

train_HIK.exe -s 1 -u 256 -e 0.000001 -B 1 samples.txt combined2.txt.model

這樣就生成了第2個行人檢測模型。

最後，將訓練好的2個模型複製到行人檢測演算法的工程資料夾下面，就可使用了。

Train C4: Real-time pedestrian detection models——C4行人檢測演算法訓練過程

Train C4: Real-time pedestrian detection models——C4行人檢測演算法訓練過程

C4: Real-time pedestrian detection——C4實時行人檢測演算法

YOLO(You Only Look Once):Real-Time Object Detection

論文閱讀筆記（六）Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

【Faster RCNN】《Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks》

《You Only Look Once: Unified, Real-Time Object Detection》論文筆記

YOLO前篇---Real-Time Grasp Detection Using Convolutional Neural Networks

論文閱讀筆記二十六：Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks（CVPR 2016）

You Only Look Once: Unified, Real-Time Object Detection 論文閱讀

Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

faced: CPU Real Time face detection using Deep Learning

論文閱讀：You Only Look Once: Unified, Real-Time Object Detection

【論文筆記】Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

【筆記】Faster-R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

經典計算機視覺論文筆記——《Robust Real-Time Face Detection》

[論文學習]《Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks 》

【翻譯】Faster-R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

CPU Real-time Face Detection and Alignment-68 using MTCNN

Real-Time Hotspot Detection in Amazon Kinesis Analytics

行人檢測論文筆記：Robust Real-Time Face Detection

Train C4: Real-time pedestrian detection models——C4行人檢測演算法訓練過程

相關推薦