遞迴調用搜索和多執行緒搜尋檔案效率對比
阿新 • • 發佈:2019-01-05
先上結果:
多執行緒版本輸出:
used time is: 0.014919s
all thread is done
scan completed, there are 10740 file(s) and 211 document(s) total: 10951
遞迴呼叫版本輸出:
open ../.cache/dconf/ failed, Error: Permission denied
used time is: 0.007569s
scan completed, there are 10740 file(s) and 211 document(s) total: 10951
可以看出來遞迴呼叫版本效率要優於多執行緒版本,下邊是兩個版本的源程式。
多執行緒版本:
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <strings.h>
#include <iostream>
#include <vector>
#include <string.h>
#include <pthread.h>
#include <unistd.h>
#include <time.h>
using namespace std;
typedef string TASKLISTTYPEDEF;
vector <TASKLISTTYPEDEF> g_task_list; ///> 任務列表
pthread_mutex_t g_mutex; ///>全域性互斥鎖
const unsigned int g_max_jobs = 64; ///>最大的執行緒數
/*
// 執行緒狀態記錄
// 0:執行緒掛起
// 1:執行緒正在工作
// 2:任務完成
*/
int g_threads_statu[g_max_jobs] = {0 };
int g_total_file = 0;
int g_total_dir = 0;
int find_dir_file(const string path, int &file_cnt, int &dir_cnt, vector <TASKLISTTYPEDEF> &new_task_list);
void *dir_proc_pthread(void *arg);
//執行緒回撥函式
//處理檔案和目錄計數
void *dir_proc_pthread(void *arg)
{
int id = *(int*)arg; ///>目錄和檔案的計數
string dir;
vector <TASKLISTTYPEDEF> new_task_list;
int dir_cnt = 0;
int file_cnt = 0;
while(true)
{
pthread_mutex_lock(&g_mutex); //上鎖
// 取任務
// 任務為空,檢查其它執行緒狀態,如果有執行緒在工作則等待,如果都在等待則任務完成
if(g_task_list.size() != 0)
{
dir = g_task_list.at(0);
g_task_list.erase(g_task_list.begin());
g_threads_statu[id] = 1;
}
else
{
for(int i = 0; i < g_max_jobs; i++)
{
if(1 == g_threads_statu[i])
{
g_threads_statu[id] = 0;
break;
}
g_threads_statu[id] = 2;
}
}
pthread_mutex_unlock(&g_mutex); //解鎖
if(g_threads_statu[id] == 0) // 掛起狀態則等待1ms
{
usleep(1250);
}
else if(g_threads_statu[id] == 1) // 工作狀態按照取得的目錄查詢
{
find_dir_file(dir, file_cnt, dir_cnt, new_task_list);
pthread_mutex_lock(&g_mutex);
g_task_list.insert(g_task_list.end(), new_task_list.begin(), new_task_list.end());
g_total_dir += dir_cnt;
g_total_file += file_cnt;
pthread_mutex_unlock(&g_mutex);
usleep(1000);
}
else if(g_threads_statu[id] == 2) // 完成狀態則退出
{
pthread_exit(NULL);
}
}
}
//查詢指定目錄下的所有檔案和目錄
//目錄打開出錯後會自動結束查詢過程
int find_dir_file(const string path, int &file_cnt, int &dir_cnt, vector <TASKLISTTYPEDEF> &new_task_list)
{
DIR *dir = NULL; ///>路徑資訊
file_cnt = 0;
dir_cnt = 0;
new_task_list.clear();
//取出資料夾列表中的第一個元素,然後移除
dir = opendir(path.data());
if(dir == NULL)
{
cout << "open " << path << " failed. ";
fflush(stdout);
perror("Error");
return 0;
}
//讀取目錄下所有的資料夾和檔名稱
struct dirent *_dirent = NULL;// = readdir(dir);
do
{
//讀取當前目錄下的一個檔案或資料夾資訊
_dirent = readdir(dir);
//到達當前目錄末尾,跳出迴圈
if(NULL == _dirent)
{
break;
}
//判斷型別,如果是檔案則列印目錄,如果是目錄則迭代進入查詢
if(DT_DIR == _dirent->d_type)
{
//如果是當前目錄或是上級目錄則跳過,否則進入目錄
if(0 == strcmp(_dirent->d_name, ".") || 0 == strcmp(_dirent->d_name, ".."))
{
continue;
}
else
{
dir_cnt++;
string new_path = path + string(_dirent->d_name) + "/";
new_task_list.push_back(new_path);
}
}
else
{
file_cnt++;
}
}while(_dirent != NULL);
if(dir != NULL)
{
closedir(dir);
}
return 0;
}
//主函式
int main(int arg, char *argc[])
{
double start = clock();
string path; ///> 存放初始路徑
pthread_t threads_ids[g_max_jobs]; ///> 記錄各個執行緒ID號
int thread_num_table[g_max_jobs] = {0}; ///> 生成各個執行緒的編號
//如果傳入了指定目錄則按照指定目錄所搜,如果沒有指定目錄則按當前目錄搜尋
if(argc[1] != NULL)
{
path = string(argc[1]);
}
else
{
path = string("./");
}
//初始化互斥鎖
pthread_mutex_init(&g_mutex, NULL);
g_task_list.push_back(path);
//建立所需要的執行緒,作為消費者
for(int i = 0; i < g_max_jobs; i++)
{
thread_num_table[i] = i;
g_threads_statu[i] = 0;
pthread_create(&threads_ids[i], NULL, &dir_proc_pthread, (void*)&thread_num_table[i]);
}
//等待所有執行緒結束
for(int i = 0; i < g_max_jobs; i++)
{
pthread_join(threads_ids[i], NULL);
}
double end = clock();
cout << "used time is: " << (end - start) / CLOCKS_PER_SEC << endl;
cout << "all thread is done" << endl;
//輸出查詢結果
cout << "scan completed, there are " << g_total_file << " file(s) and " << g_total_dir << " document(s)" \
<< " total: " << g_total_file + g_total_dir << endl;
//銷燬互斥鎖
pthread_mutex_destroy(&g_mutex);
return 0;
}
//end of file
遞迴呼叫版本:
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <time.h>
typedef struct _FILEDOCNUM{
unsigned int filenum;
unsigned int docnum;
}FILEDOCNUM;
FILEDOCNUM filedocnum = {0, 0};
int find_all_dir_file(const char *base_path)
{
DIR *dir = NULL; ///>路徑資訊
char path[1024] = {0}; ///>拷貝當前進入目錄
char enter_path[1024] = {0}; ///>進入的下級目錄
//拷貝當前進入目錄
strcpy(path, base_path);
//開啟目錄
dir = opendir(path);
//判斷是否成功打開了目錄
if(dir == NULL)
{
printf("open %s failed, ", path);
fflush(stdout);
perror("Error");
return 0;
}
//開啟成功
//讀取目錄下所有的資料夾和檔名稱
struct dirent *_dirent = NULL;// = readdir(dir);
do
{
//讀取當前目錄下的一個檔案或資料夾資訊
_dirent = readdir(dir);
//到達當前目錄末尾,跳出迴圈
if(NULL == _dirent)
{
break;
}
//判斷型別,如果是檔案則列印目錄,如果是目錄則迭代進入查詢
if(DT_DIR == _dirent->d_type)
{
//如果是當前目錄或是上級目錄則跳過,否則進入目錄迭代查詢
if(0 == strcmp(_dirent->d_name, ".") || 0 == strcmp(_dirent->d_name, ".."))
{
continue;
}
else
{
strcpy(enter_path, path);
strcat(enter_path, _dirent->d_name);
strcat(enter_path, "/");
filedocnum.docnum++;
// printf("%s\r\n", enter_path);
find_all_dir_file(enter_path);
}
}
else
{
filedocnum.filenum++;
// printf("%s%s\r\n", path, _dirent->d_name);
}
}while(_dirent != NULL);
if(dir != NULL)
{
closedir(dir);
}
return 0;
}
int main(int arg, char *argc[])
{
double start = clock();
char path[1024];
//如果傳入了指定目錄則按照指定目錄所搜,如果沒有指定目錄則按當前目錄搜尋
if(argc[1] != NULL)
{
strcpy(path, argc[1]);
}
else
{
strcpy(path, "./");
}
printf("begin to scan all file and path\r\n");
find_all_dir_file(path);
double end = clock();
printf("used time is: %f\r\n", (end - start) / CLOCKS_PER_SEC);
printf("scan completed, there are %d file(s) and %d document(s) total: %d\r\n", \
filedocnum.filenum, filedocnum.docnum, filedocnum.filenum + filedocnum.docnum);
return 0;
}