1. 程式人生 > >trie樹查找和hash查找比較(大量數據)

trie樹查找和hash查找比較(大量數據)

stdlib.h emp lib ras eno strlen oid ack std

trie樹代碼

#include<iostream>
#include<stdio.h>
#include<iostream>
#include<string>
#include<stdlib.h>
#include<fstream>
#include<sstream>
#include<vector>
#include<string>
#include<time.h>
using namespace std;
class trienode
{
public:
    char
*word; int count; trienode *branch[26]; public: trienode() { word = NULL; count = 0;//詞頻 memset(branch, NULL, sizeof(trienode*) * 26); } }; class trie { public: trienode *root; public: trie(); ~trie(); void Insert(char *str); bool Search(char
*str, int &count);//索引 void printall(trienode *root);//字符排序 void printpre(char *str);//前綴匹配 }; trie::trie() { root = new trienode(); } trie::~trie() {} void trie::Insert(char *str) { int index; trienode *tt = root; for (int i = 0; str[i]; i++) { index = str[i] - a;
if (index < 0 || index>26) { return; } if (tt->branch[index] == NULL) { tt->branch[index] = new trienode(); } tt = tt->branch[index]; } if (tt->word) { tt->count++; return; } else { tt->count++; tt->word = new char[strlen(str) + 1]; strcpy_s(tt->word, strlen(str) + 1, str); } } bool trie::Search(char *str, int &count) { int index = -1; trienode *tt = root; while (tt&&*str) { index = *str - a; if (index < 0 || index>26) return false; tt = tt->branch[index]; str++; } if (tt&&tt->word) { count = tt->count; return true; } return false; } void trie::printall(trienode *root) { trienode *t = root; if (!t) return; if (t->word) { cout << t->word << endl; } for (int i = 0; i < 26; i++) { printall(t->branch[i]); } } void trie::printpre(char *str) { trienode *t = root; int index = -1; while (t&&*str) { index = *str - a; if (index < 0 || index>26) return; t = t->branch[index]; str++; } if (t) { printall(t); } } int main() { clock_t startTime, endTime; startTime = clock(); trie *t = new trie(); ifstream it("C:/Users/ww/Desktop/string.txt"); string sline; string str = ""; while (it&&getline(it, sline)) { str += sline + " "; } it.close(); for (int i = 0; i < str.length(); i++) { if (str[i] == . || str[i] == , || str[i] == ( || str[i] == () { str.erase(i, 1); } } string word; stringstream ss(str); vector<string> vec; while (ss >> word) { vec.push_back(word); } vector<string>::iterator iter; for (iter = vec.begin(); iter != vec.end(); iter++) { t->Insert((char*)(*iter).data()); } int val = -1; if (t->Search("the", val)) { cout << val << endl; } else { cout << "empty" << endl; } endTime = clock(); cout << "the running time is " << (double)(endTime - startTime) << endl; return 0; }

hash代碼

#include<iostream>
#include<fstream>
#include<sstream>
#include<string>
#include<vector>
#include<stdlib.h>
#include<time.h>
using namespace std;
class hashnode
{
public:
    char *p;
    hashnode *next;
};
class hashmap
{
public:
    hashnode *hashps[1000];
public:
    hashmap();
    ~hashmap();
    int String2Int(char *p);
    void Insert(char *p);
    bool Find(char *p);
};
hashmap::hashmap()
{
    for (int i = 0; i < 1000; i++)
    {
        hashps[i] = new hashnode();
    }
    for (int i = 0; i < 1000; i++)
    {
        hashps[i]->next = NULL;
    }
}
hashmap::~hashmap() {}
int hashmap::String2Int(char *p)
{
    int num = 0;
    while (*p)
    {
        num += *p;
        p++;
    }
    return num % 1000;
}
void hashmap::Insert(char *p)
{
    int index = String2Int(p);
    hashnode *hash = hashps[index];
    hashnode *newr = new hashnode();
    newr->p = new char[strlen(p) + 1];
    strcpy_s(newr->p, strlen(p) + 1, p);
    newr->next = hash->next;
    hash->next = newr;
}
bool hashmap::Find(char *p)
{
    int index = String2Int(p);
    hashnode *t = hashps[index]->next;
    if (!t)
    {
        return false;
    }
    else
    {
        hashnode *w = t;
        while (w)
        {
            if (strcmp(p, w->p)==0)
            {
                return true;
            }
            w = w->next;
        }
    }
}
int re(int *p)
{
    return *p;
}
int main()
{
    clock_t startTime, endTime;
    startTime = clock();
    hashmap *t = new hashmap();
    ifstream it("C:/Users/ww/Desktop/string.txt");
    string sline;
    string str = "";
    while (it&&getline(it, sline))
    {
        str += sline + " ";
    }
    it.close();
    for (int i = 0; i < str.length(); i++)
    {
        if (str[i] == . || str[i] == , || str[i] == ( || str[i] == ()
        {
            str.erase(i, 1);
        }
    }
    stringstream ss(str);
    string word;
    vector<string> vec;
    while (ss >> word)
    {
        vec.push_back(word);
    }
    vector<string>::iterator iter;
    for (iter = vec.begin(); iter != vec.end(); iter++)
    {
        t->Insert((char*)(*iter).data());
    }
    cout << "the result is: " << t->Find("the") << endl;
    endTime = clock();
    cout << "the running time is " << (double)(endTime - startTime) << endl;
    return 0;
}

trie樹查找時間是O(L)L是字符串長度,而hash是O(LL),LL是關鍵字對應哈希地址鏈表長度,都和數據的大小無關,查找都很高效

trie樹查找和hash查找比較(大量數據)