1. 程式人生 > >遞迴調用搜索和多執行緒搜尋檔案效率對比

遞迴調用搜索和多執行緒搜尋檔案效率對比

先上結果:
多執行緒版本輸出:

used time is: 0.014919s
all thread is done
scan completed, there are 10740 file(s) and 211 document(s) total: 10951

遞迴呼叫版本輸出:

open ../.cache/dconf/ failed, Error: Permission denied
used time is: 0.007569s
scan completed, there are 10740 file(s) and 211 document(s) total: 10951

可以看出來遞迴呼叫版本效率要優於多執行緒版本,下邊是兩個版本的源程式。

多執行緒版本:

#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <strings.h>
#include <iostream>
#include <vector>
#include <string.h>
#include <pthread.h>
#include <unistd.h>
#include <time.h> using namespace std; typedef string TASKLISTTYPEDEF; vector <TASKLISTTYPEDEF> g_task_list; ///> 任務列表 pthread_mutex_t g_mutex; ///>全域性互斥鎖 const unsigned int g_max_jobs = 64; ///>最大的執行緒數 /* // 執行緒狀態記錄 // 0:執行緒掛起 // 1:執行緒正在工作 // 2:任務完成 */ int g_threads_statu[g_max_jobs] = {0
}; int g_total_file = 0; int g_total_dir = 0; int find_dir_file(const string path, int &file_cnt, int &dir_cnt, vector <TASKLISTTYPEDEF> &new_task_list); void *dir_proc_pthread(void *arg); //執行緒回撥函式 //處理檔案和目錄計數 void *dir_proc_pthread(void *arg) { int id = *(int*)arg; ///>目錄和檔案的計數 string dir; vector <TASKLISTTYPEDEF> new_task_list; int dir_cnt = 0; int file_cnt = 0; while(true) { pthread_mutex_lock(&g_mutex); //上鎖 // 取任務 // 任務為空,檢查其它執行緒狀態,如果有執行緒在工作則等待,如果都在等待則任務完成 if(g_task_list.size() != 0) { dir = g_task_list.at(0); g_task_list.erase(g_task_list.begin()); g_threads_statu[id] = 1; } else { for(int i = 0; i < g_max_jobs; i++) { if(1 == g_threads_statu[i]) { g_threads_statu[id] = 0; break; } g_threads_statu[id] = 2; } } pthread_mutex_unlock(&g_mutex); //解鎖 if(g_threads_statu[id] == 0) // 掛起狀態則等待1ms { usleep(1250); } else if(g_threads_statu[id] == 1) // 工作狀態按照取得的目錄查詢 { find_dir_file(dir, file_cnt, dir_cnt, new_task_list); pthread_mutex_lock(&g_mutex); g_task_list.insert(g_task_list.end(), new_task_list.begin(), new_task_list.end()); g_total_dir += dir_cnt; g_total_file += file_cnt; pthread_mutex_unlock(&g_mutex); usleep(1000); } else if(g_threads_statu[id] == 2) // 完成狀態則退出 { pthread_exit(NULL); } } } //查詢指定目錄下的所有檔案和目錄 //目錄打開出錯後會自動結束查詢過程 int find_dir_file(const string path, int &file_cnt, int &dir_cnt, vector <TASKLISTTYPEDEF> &new_task_list) { DIR *dir = NULL; ///>路徑資訊 file_cnt = 0; dir_cnt = 0; new_task_list.clear(); //取出資料夾列表中的第一個元素,然後移除 dir = opendir(path.data()); if(dir == NULL) { cout << "open " << path << " failed. "; fflush(stdout); perror("Error"); return 0; } //讀取目錄下所有的資料夾和檔名稱 struct dirent *_dirent = NULL;// = readdir(dir); do { //讀取當前目錄下的一個檔案或資料夾資訊 _dirent = readdir(dir); //到達當前目錄末尾,跳出迴圈 if(NULL == _dirent) { break; } //判斷型別,如果是檔案則列印目錄,如果是目錄則迭代進入查詢 if(DT_DIR == _dirent->d_type) { //如果是當前目錄或是上級目錄則跳過,否則進入目錄 if(0 == strcmp(_dirent->d_name, ".") || 0 == strcmp(_dirent->d_name, "..")) { continue; } else { dir_cnt++; string new_path = path + string(_dirent->d_name) + "/"; new_task_list.push_back(new_path); } } else { file_cnt++; } }while(_dirent != NULL); if(dir != NULL) { closedir(dir); } return 0; } //主函式 int main(int arg, char *argc[]) { double start = clock(); string path; ///> 存放初始路徑 pthread_t threads_ids[g_max_jobs]; ///> 記錄各個執行緒ID號 int thread_num_table[g_max_jobs] = {0}; ///> 生成各個執行緒的編號 //如果傳入了指定目錄則按照指定目錄所搜,如果沒有指定目錄則按當前目錄搜尋 if(argc[1] != NULL) { path = string(argc[1]); } else { path = string("./"); } //初始化互斥鎖 pthread_mutex_init(&g_mutex, NULL); g_task_list.push_back(path); //建立所需要的執行緒,作為消費者 for(int i = 0; i < g_max_jobs; i++) { thread_num_table[i] = i; g_threads_statu[i] = 0; pthread_create(&threads_ids[i], NULL, &dir_proc_pthread, (void*)&thread_num_table[i]); } //等待所有執行緒結束 for(int i = 0; i < g_max_jobs; i++) { pthread_join(threads_ids[i], NULL); } double end = clock(); cout << "used time is: " << (end - start) / CLOCKS_PER_SEC << endl; cout << "all thread is done" << endl; //輸出查詢結果 cout << "scan completed, there are " << g_total_file << " file(s) and " << g_total_dir << " document(s)" \ << " total: " << g_total_file + g_total_dir << endl; //銷燬互斥鎖 pthread_mutex_destroy(&g_mutex); return 0; } //end of file

遞迴呼叫版本:

#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <time.h>

typedef struct _FILEDOCNUM{
    unsigned int filenum;
    unsigned int docnum;
}FILEDOCNUM;

FILEDOCNUM filedocnum = {0, 0};

int find_all_dir_file(const char *base_path)
{
    DIR *dir = NULL;                ///>路徑資訊
    char path[1024] = {0};          ///>拷貝當前進入目錄
    char enter_path[1024] = {0};    ///>進入的下級目錄

    //拷貝當前進入目錄
    strcpy(path, base_path);
    //開啟目錄
    dir = opendir(path);
    //判斷是否成功打開了目錄
    if(dir == NULL)
    {
        printf("open %s failed, ", path);
        fflush(stdout);
        perror("Error");
        return 0;
    }

    //開啟成功
    //讀取目錄下所有的資料夾和檔名稱
    struct dirent *_dirent = NULL;// = readdir(dir);
    do
    {
        //讀取當前目錄下的一個檔案或資料夾資訊
        _dirent = readdir(dir);
        //到達當前目錄末尾,跳出迴圈
        if(NULL == _dirent)
        {
            break;
        }
        //判斷型別,如果是檔案則列印目錄,如果是目錄則迭代進入查詢
        if(DT_DIR == _dirent->d_type)
        {
            //如果是當前目錄或是上級目錄則跳過,否則進入目錄迭代查詢
            if(0 == strcmp(_dirent->d_name, ".") || 0 == strcmp(_dirent->d_name, ".."))
            {
                continue;
            }
            else
            {
                strcpy(enter_path, path);
                strcat(enter_path, _dirent->d_name);
                strcat(enter_path, "/");
                filedocnum.docnum++;
                // printf("%s\r\n", enter_path);
                find_all_dir_file(enter_path);
            }

        }
        else
        {
            filedocnum.filenum++;
            // printf("%s%s\r\n", path, _dirent->d_name);
        }  
    }while(_dirent != NULL);

    if(dir != NULL)
    {
        closedir(dir);
    }
    return 0;
}

int main(int arg, char *argc[])
{
    double start = clock();
    char path[1024];
    //如果傳入了指定目錄則按照指定目錄所搜,如果沒有指定目錄則按當前目錄搜尋
    if(argc[1] != NULL)
    {
        strcpy(path, argc[1]);
    }
    else
    {
        strcpy(path, "./");
    }
    printf("begin to scan all file and path\r\n");
    find_all_dir_file(path);
    double end = clock();
    printf("used time is: %f\r\n", (end - start) / CLOCKS_PER_SEC);
    printf("scan completed, there are %d file(s) and %d document(s) total: %d\r\n", \
          filedocnum.filenum, filedocnum.docnum, filedocnum.filenum + filedocnum.docnum);
    return 0;
}