dlib實現人臉識別(一)生成描述檔案和標籤檔案
阿新 • • 發佈:2018-12-04
#include <iostream> #include <dlib/dnn.h> #include <dlib/data_io.h> #include <dlib/image_processing.h> #include <dlib/gui_widgets.h> #include <dlib/dnn.h> #include <dlib/gui_widgets.h> #include <dlib/clustering.h> #include <dlib/string.h> #include <dlib/image_io.h> #if 0 #include <opencv2/opencv.hpp> #endif #include <vector> #include <dlib/opencv.h> #include <opencv2/highgui/highgui.hpp> #include <dlib/image_processing/frontal_face_detector.h> #include <dlib/image_processing/render_face_detections.h> #include <dlib/image_processing.h> #include <dlib/image_processing/frontal_face_detector.h> using namespace std; using namespace dlib; using namespace cv; // ---------------------------------------------------------------------------------------- template <long num_filters, typename SUBNET> using con5d = con<num_filters, 5, 5, 2, 2, SUBNET>; template <long num_filters, typename SUBNET> using con5 = con<num_filters, 5, 5, 1, 1, SUBNET>; template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16, SUBNET>>>>>>>>>; template <typename SUBNET> using rcon5 = relu<affine<con5<45, SUBNET>>>; using net_type = loss_mmod<con<1, 9, 9, 1, 1, rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; template <template <int, template<typename>class, int, typename> class block, int N, template<typename>class BN, typename SUBNET> using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>; template <template <int, template<typename>class, int, typename> class block, int N, template<typename>class BN, typename SUBNET> using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>; template <int N, template <typename> class BN, int stride, typename SUBNET> using block = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>; template <int N, typename SUBNET> using ares = relu<residual<block, N, affine, SUBNET>>; template <int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>; template <typename SUBNET> using alevel0 = ares_down<256, SUBNET>; template <typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>; template <typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>; template <typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>; template <typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>; using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything< alevel0< alevel1< alevel2< alevel3< alevel4< max_pool<3, 3, 2, 2, relu<affine<con<32, 7, 7, 2, 2, input_rgb_image_sized<150> >>>>>>>>>>>>; /* matrix<rgb_pixel> img; cv::Mat image = cv::imread(path); array2d< bgr_pixel> arrimg(image.rows, image.cols); dlib::assign_image(img, cv_image<rgb_pixel>(image)); */ std::vector<matrix<rgb_pixel>> jitter_image( const matrix<rgb_pixel>& img ) { // All this function does is make 100 copies of img, all slightly jittered by being // zoomed, rotated, and translated a little bit differently. They are also randomly // mirrored left to right. thread_local dlib::rand rnd; std::vector<matrix<rgb_pixel>> crops; for (int i = 0; i < 100; ++i) crops.push_back(jitter_image(img, rnd)); return crops; } void listFiles(const char * dir, std::vector<string> &vfile) { using namespace std; HANDLE hFind; WIN32_FIND_DATA findData; LARGE_INTEGER size; char dirNew[100]; // 向目錄加萬用字元,用於搜尋第一個檔案 strcpy(dirNew, dir); strcat(dirNew, "\\*.*"); hFind = FindFirstFile(dirNew, &findData); do { // 是否是資料夾,並且名稱不為"."或".." if (findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY != 0 && strcmp(findData.cFileName, ".") != 0 && strcmp(findData.cFileName, "..") != 0 ) { // 將dirNew設定為搜尋到的目錄,並進行下一輪搜尋 strcpy(dirNew, dir); strcat(dirNew, "\\"); strcat(dirNew, findData.cFileName); //listFiles(dirNew); } else { size.LowPart = findData.nFileSizeLow; size.HighPart = findData.nFileSizeHigh; //cout << findData.cFileName << "\t" << size.QuadPart << " bytes\n"; vfile.push_back(findData.cFileName); } } while (FindNextFile(hFind, &findData)); FindClose(hFind); return; } Mat MyResizeImage(Mat pSrc, double dScale) { IplImage *pImgSrc = &IplImage(pSrc); CvSize size; size.width = pImgSrc->width*dScale; size.height = pImgSrc->height*dScale; IplImage *pDes = cvCreateImage(size, pImgSrc->depth, pImgSrc->nChannels); cvResize(pImgSrc, pDes, CV_INTER_CUBIC); Mat matDes = cvarrToMat(pDes, true); cvReleaseImage(&pDes); return matDes; } int main(int argc, char*argv[]) { try { //建立人臉檢測物件 net_type net; deserialize("./mmod_human_face_detector.dat") >> net; //建立人臉特徵點檢測物件 shape_predictor sp; deserialize("shape_predictor_68_face_landmarks.dat") >> sp; //建立人臉識別物件 anet_type facerec; deserialize("dlib_face_recognition_resnet_model_v1.dat") >> facerec; //載入需要識別的圖片物件 std::vector<string> arrFiles; listFiles("./images/", arrFiles); std::vector<string>::iterator it_begin = arrFiles.begin(); std::vector<string>::iterator it_end = arrFiles.end(); //人臉描述佇列 std::vector<matrix<float, 0, 1>> arrSerialize; //標籤佇列 std::vector<string> arrLabel; //顯示視窗 image_window img_win; for (; it_begin != it_end; ++it_begin) { string ext = strrchr((*it_begin).c_str(), '.'); cout << ext.c_str() << endl; if (ext == ".jpg") { string strTmpJpg = "./images/"; string strJpgName = (*it_begin).substr(0, (*it_begin).rfind(".")); strTmpJpg += (*it_begin).c_str(); //get image cv::Mat tempimg = imread(strTmpJpg.c_str()); cv::Mat image2 = MyResizeImage(tempimg, 0.5); cv_image<bgr_pixel> cimg(image2); matrix<rgb_pixel> img; dlib::assign_image(img, cimg); //檢測畫面中的人臉 auto dets = net(img); std::vector<matrix<rgb_pixel>> faces; std::vector<full_object_detection> shapes; for (auto&& d : dets) { // get the landmarks for this human's face auto shape = sp(img, d.rect); //獲取人臉區域68個特徵點 matrix<rgb_pixel> face_chip; extract_image_chip(img, get_face_chip_details(shape, 150, 0.25), face_chip); faces.push_back(move(face_chip)); shapes.push_back(shape); } if (faces.size() == 0) { cout << "No faces found in image!" << endl; return 1; } //獲取人臉描述 std::vector<matrix<float, 0, 1>> face_descriptors = facerec(faces); std::vector<sample_pair> edges; for (size_t i = 0; i < face_descriptors.size(); ++i) { for (size_t j = i; j < face_descriptors.size(); ++j) { if (length(face_descriptors[i] - face_descriptors[j]) < 0.6) edges.push_back(sample_pair(i, j)); } } std::vector<unsigned long> labels; const auto num_clusters = chinese_whispers(edges, labels); cout << "number of people found in the image: " << num_clusters << endl; std::vector<image_window> win_clusters(num_clusters); img_win.set_title("img"); img_win.clear_overlay(); img_win.set_image(img); img_win.add_overlay(render_face_detections(shapes)); for (size_t cluster_id = 0; cluster_id < num_clusters; ++cluster_id) { std::vector<matrix<rgb_pixel>> temp; for (size_t j = 0; j < labels.size(); ++j) { if (cluster_id == labels[j]) temp.push_back(faces[j]); } win_clusters[cluster_id].set_title("face cluster " + cast_to_string(cluster_id)); win_clusters[cluster_id].set_image(tile_images(temp)); } arrSerialize.push_back(face_descriptors[0]); arrLabel.push_back(strJpgName); } } serialize("assigner.dat") << arrSerialize; serialize("label.dat") << arrLabel; } catch (exception& e) { cout << e.what() << endl; } }