1. 程式人生 > >linux下c/c++例項之七遞迴掃描目錄下的檔案

linux下c/c++例項之七遞迴掃描目錄下的檔案

一、簡介

      Linux下遞迴掃描該目錄下所有的檔案,完成更為詳細的檔案操作需求。其他庫中比如Qt、Boost庫中已有介面函式。

二、詳解

1、遞迴掃描檔案的程式碼

(1)scanfile.cpp:

#include <iostream>
#include <string>
#include <vector>
#include <sys/stat.h>
#include <regex.h>
#include <libgen.h>
#include <dirent.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
using namespace std;
vector<string> v_file;
int regex_match(const char *buffer, const char *pattern)
{
    int ret = 0;
    char errbuf[1024] = {0};
    regex_t reg;
    regmatch_t pm[1] = {0};
    ret = regcomp(&reg, pattern, REG_EXTENDED | REG_ICASE);
    if (ret != 0) {
        regerror(ret, &reg, errbuf, sizeof(errbuf));
        fprintf(stderr, "%s:regcom(%s)\n", errbuf, pattern);
        return -1;
    }
    if (regexec(&reg, buffer, 1, pm, 0) == 0) {
        regfree(&reg);
        return 0;                         //匹配成功
    }
    else {
        regfree(&reg);
        return -1;
    }
}
int scan_dirpath(char *path, char *pattern)    //遞迴掃描該目錄下所有的檔案和目錄
{
    char file_path[512] = {0};
    char file[512] = {0};
    DIR *dir = NULL;
    struct dirent *ptr = NULL;
    struct stat buf;
    int i, j;
    /****************瀏覽目錄***************************/
    if ((dir = opendir(path)) == NULL) {
        perror("opendir failed!");
        return -1;
    }
    while((ptr = readdir(dir)) != NULL) {
        if (ptr->d_name[0] != '.') {//除去根檔案目錄
            strcpy(file_path, path);
            if (path[strlen(path) - 1] != '/')  strcat(file_path, "/");
            strcat(file_path, ptr->d_name);          //構建完整的檔名
            assert(stat(file_path, &buf) != -1);
            if(S_ISREG(buf.st_mode)) {        //判斷的是檔案
                for(i = 0; i < strlen(file_path); i++) {
                    if(file_path[i] == '/') {
                        memset(file, 0, strlen(file));
                        j = 0;
                        continue;
                    }
                    file[j++] = file_path[i];
                }
                if (regex_match(file, pattern) == 0) {  //正則匹配成功
                    v_file.push_back(file_path);
                }
            }
            else if(S_ISDIR(buf.st_mode)) {   //判斷的是目錄
                scan_dirpath(file_path, pattern);
            }
        }
    }
    return 0;
}

int main()
{
    char path[512] = "/tmp/other";
    char pattern[32] = ".*.cpp";
    scan_dirpath(path, pattern);
    for (int i = 0; i < v_file.size(); i++) {
        cout<<v_file[i]<<endl;
    }
    return 0;
}

(2)編譯執行

g++ -o scanfile scanfile.cpp 
./scanfile

2、非遞迴掃描目錄檔案的C++模板

(1)scanfile.cpp:
#include <iostream>
#include <string>
#include <vector>
#include <regex.h>
#include <assert.h>
#include <sys/stat.h>
#include <dirent.h>
#include <algorithm>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
using namespace std;
struct scan_info    //掃描檔案資訊
{
    string file_dir;
    string file_name;
    int create_time;
};
class compare_name    //lhs > rhs,get file in ascending order.
{
public:
    /*Sort by file creation time and file_name in descending order, so get file in back will be in ascending order.*/
    bool operator()(const scan_info& lhs, const scan_info& rhs) {
        if (lhs.file_name > rhs.file_name)  return true;
        //else if (lhs.create_time == rhs.create_time && lhs.file_name > rhs.file_name)  return true;
        else  return false;
    }
};
template <typename compare = compare_name>
class scan_file
{
public:
    // Scan file in single-directory mode.
    scan_file(const string& file_dir, const string& pattern, int file_count = 1024);
    // Scan file in multi-directory mode.
    scan_file(const vector<string>& dir_vector, const string& pattern, int file_count = 1024);
    // Scan file in dir/sub-dirs mode.
    scan_file(const string& dir, const vector<string>& sub_dirs, const string& pattern, int file_count = 1024);
    virtual ~scan_file();
public:
    // Get a file in given directories. Upon file found, return true, otherwise return false.
    // In single-directory mode, return file name, otherwise return full name.
    bool get_file(string& file_name);
    // Get all files in given directories.
    // In single-directory mode, return file name, otherwise return full name.
    void get_files(vector<string>& files);
private:
    vector<string> dir_vector;
    regex_t reg;
    vector<scan_info> file_vector;
};
template<typename compare> scan_file<compare>::scan_file(const string& file_dir, const string& pattern, int file_count)
    : dir_vector(1, file_dir)
{
    assert(regcomp(&reg, pattern.c_str(), REG_NOSUB | REG_EXTENDED) == 0);
    file_vector.reserve(file_count);
}
template<typename compare> scan_file<compare>::scan_file(const vector<string>& dir_vector_, const string& pattern, int file_count)
    : dir_vector(dir_vector_)
{
    // 以功能更加強大的擴充套件正則表示式的方式進行匹配,不用儲存匹配後的結果
    assert(regcomp(&reg, pattern.c_str(), REG_NOSUB | REG_EXTENDED) == 0);
    file_vector.reserve(file_count);
}
template<typename compare> scan_file<compare>::scan_file(const string& dir, const vector<string>& sub_dirs, const string& pattern, int file_count)
{
    vector<string>::const_iterator iter;
    for (iter = sub_dirs.begin(); iter != sub_dirs.end(); ++iter) {
        dir_vector.push_back(dir + '/' + *iter);
    }
    assert(regcomp(&reg, pattern.c_str(), REG_NOSUB | REG_EXTENDED) == 0);
    file_vector.reserve(file_count);
}
template<typename compare> scan_file<compare>::~scan_file()
{
    regfree(&reg);
}
template<typename compare> bool scan_file<compare>::get_file(string& file_name)
{
    /**先掃描目錄,將所有的檔案都寫入到vector中**/
    /**如果找到檔案,每次從vector中讀取一個檔案**/
    /**不能遞迴掃描,多檔案時會返回全路徑**/
    DIR* dirp;
    dirent ent;
    dirent* result;
    struct stat stat_buf;
    string full_name = "";
    scan_info file_info;
    file_name = "";
    while (file_vector.size() > 0) {
        vector<scan_info>::iterator iter = file_vector.begin();
        if (access((iter->file_dir + '/' + iter->file_name).c_str(), F_OK) == -1) {
            std::pop_heap(file_vector.begin(), file_vector.end(), compare());
            file_vector.pop_back();
            continue;
        }
        if (dir_vector.size() == 1)  file_name = iter->file_name;
        else  file_name = iter->file_dir + '/' + iter->file_name;
        std::pop_heap(file_vector.begin(), file_vector.end(), compare());
        file_vector.pop_back();
        return true;
    }
    vector<string>::const_iterator dir_iter;
    for (dir_iter = dir_vector.begin(); dir_iter != dir_vector.end(); ++dir_iter) {
        assert((dirp = opendir(dir_iter->c_str())) != NULL);
        while (readdir_r(dirp, &ent, &result) == 0 && result != 0) {
            if (strcmp(ent.d_name, ".") == 0 || strcmp(ent.d_name, "..") == 0)  continue;
            if (regexec(&reg, ent.d_name, (size_t)0, 0, 0) != 0)  continue;
            full_name = *dir_iter + '/' + ent.d_name;
            assert(::lstat(full_name.c_str(), &stat_buf) >= 0);
            if (S_ISDIR(stat_buf.st_mode) == 0) {
                file_info.file_dir = *dir_iter;
                file_info.file_name = ent.d_name;
                file_info.create_time = stat_buf.st_mtime;
                file_vector.push_back(file_info);
            }
        }
        closedir(dirp);
    }
    /**也可以採用檔案載入完畢後更改名字
    *err_msg << "mv " << m_real_file << " " << m_real_file << ".bak";
    *system(err_msg.str().c_str());
    **/
    if (dir_vector.size() > 0)  dir_vector.clear();
    if (file_vector.size() > 0) {
        //make_heap以迭代器[start,end] 區間內的元素生成一個堆. 預設使用元素型別 的 < 操作符 進行判斷堆的型別, 因此生成的是大頂堆. 這裡是小頂堆
        std::make_heap(file_vector.begin(), file_vector.end(), compare());
        while (file_vector.size() > 0) {
            vector<scan_info>::iterator iter = file_vector.begin();
            if (access((iter->file_dir + '/' + iter->file_name).c_str(), F_OK) == -1) { //檔案不存在
                //pop_heap() 並不是真的把最大(最小)的元素從堆中彈出來. 而是重新排序堆. 它把首元素和末元素交換,然後將[first,last-1]的資料再做成一個堆。
                std::pop_heap(file_vector.begin(), file_vector.end(), compare());
                file_vector.pop_back();
                continue;
            }
            if (dir_vector.size() == 1)  file_name = iter->file_name;
            else  file_name = iter->file_dir + '/' + iter->file_name;
            std::pop_heap(file_vector.begin(), file_vector.end(), compare());
            file_vector.pop_back();
            return true;
        }

        return false;
    }
    else {
        return false;
    }
}
template<typename compare> void scan_file<compare>::get_files(vector<string>& files)
{
    /**只掃描該目錄下的檔案,不掃描資料夾**/
    /**若想遞迴掃描,可將每次掃描到的檔案push_back進vector**/
    DIR* dirp;
    dirent ent;
    dirent* result;
    struct stat stat_buf;
    string full_name = "";
    files.resize(0);
    vector<string>::const_iterator dir_iter;
    for (dir_iter = dir_vector.begin(); dir_iter != dir_vector.end(); ++dir_iter) {
        assert((dirp = opendir(dir_iter->c_str())) != NULL);
        while (readdir_r(dirp, &ent, &result) == 0 && result != 0) {
            if (strcmp(ent.d_name, ".") == 0 || strcmp(ent.d_name, "..") == 0)  continue;
            full_name = *dir_iter + '/' + ent.d_name;
            if (regexec(&reg, ent.d_name, (size_t)0, 0, 0) != 0)  continue;
            assert(::lstat(full_name.c_str(), &stat_buf) >= 0);
            if (S_ISDIR(stat_buf.st_mode) == 0) {        //不是資料夾
                if (regexec(&reg, ent.d_name, (size_t)0, 0, 0) == 0) {
                    files.push_back(ent.d_name);
                }
            }
        }
        closedir(dirp);
    }
}
int main()
{
    string path = "/tmp/other";
    string pattern = ".*.cpp";
    scan_file<> *tmp = new scan_file<>(path, pattern, 1);
    /**********方式一:單個檔案獲取************/
    string file = "";
    while (tmp->get_file(file) == true) {
        cout<<file<<endl;
    }
    delete tmp;
    /**********方式二:vecotor獲取************/
    cout<<"------------------------------------"<<endl;
    tmp = new scan_file<>(path, pattern, 1);
    vector<string> files;
    tmp->get_files(files);
    vector<string>::iterator it;
    for ( it = files.begin(); it < files.end(); it++ ) {
        cout<<*it<<endl;
    }
    delete tmp;
    return 0;
}
(2)編譯執行:
g++ -o scanfile scanfile.cpp 
./scanfile

三、總結

(1)遞迴掃描路徑暫只能使用絕對路徑,相對路徑需要自己轉換。
(2)若有建議,請留言,在此先感謝!