【演算法】Trie數（字首樹/字典樹）簡介及Leetcode上關於字首樹的題

阿新 • • 發佈：2019-02-12

前幾天同學面今日頭條被問到了Trie樹，剛好我也對於Trie樹這種資料結構不是很熟悉，所以研究了一下字首樹，然後把Leetcode上關於字首樹的題都給做了一遍。

Leetcode上關於字首樹的題有如下：

Trie簡介

Trie樹，又稱單詞查詢樹或鍵樹，是一種樹形結構，是一種雜湊樹的變種。
典型應用是
1. 用於統計和排序大量的字串（但不僅限於字串），所以經常被搜尋引擎系統用於文字詞頻統計。
2. 用於字首匹配，比如我們在搜尋引擎中輸入待搜尋的字詞時，搜尋引擎會給予提示有哪些字首。
它的優點是：最大限度地減少無謂的字串比較，查詢效率比雜湊表高。缺點就是空間開銷大。

字首樹
這裡寫圖片描述

有如下特點：
1. 根節點不包含字元，除根節點外每一個節點都只包含一個字元。
2. 從根節點到某一節點，路徑上經過的字元連線起來，為該節點對應的字串。
3. 每個節點的所有子節點包含的字元都不相同。
4. 如果字元的種數為n，則每個結點的出度為n，這也是空間換時間的體現，浪費了很多的空間。
5. 插入查詢的複雜度為O(n)，n為字串長度。

class TrieNode {
public:
//因為題目中是說字元都是小寫字母。所以只用26個子節點就好
    TrieNode *child[26];
    bool isWord;
    TrieNode() : isWord(false 
){
        for (auto &a : child) a = nullptr;
    }
};      //這個是字首樹的每個節點的構造,其中isWord表示是否有以這個節點結尾的單詞
//下面這個就是字首樹所包含的操作了
class Trie {
private:
    TrieNode *root;
public:
    /** Initialize your data structure here. */
    Trie() {
        root = new TrieNode();
    }

    /** Inserts a word into the trie. */ 

    //插入操作
    void insert(string word) {
        TrieNode * nptr = root;
        for (int i = 0; i<word.size(); i++){
        //每次判斷接下來的這個節點是否存在,如果不存在則建立一個
            if (nptr->child[word[i] - 'a'] == NULL)
                nptr->child[word[i] - 'a'] = new TrieNode();
            nptr = nptr->child[word[i] - 'a'];
        }
        nptr->isWord = true;
    }

    /** Returns if the word is in the trie. */
    //搜尋操作，判斷某一個字串是否存在於這個字典序列中
    bool search(string word) {
        if (word.size() == 0)
            return false;
        TrieNode *nptr = root;
        for (int i = 0; i<word.size(); i++){
            if (nptr->child[word[i] - 'a'] == NULL)
                return false;
            nptr = nptr->child[word[i] - 'a'];
        }
        //判斷是否有以當前節點為結尾的字串
        return nptr->isWord;
    }

    /** Returns if there is any word in the trie that starts with the given prefix. */
    //判斷是否存在以prefix為字首的字串，其實跟search操作幾乎一樣啦，只不過最後返回的時候不用判斷結尾節點是否為一個葉子結點
    bool startsWith(string prefix) {
        if (prefix.size() == 0)
            return false;
        TrieNode *nptr = root;
        for (int i = 0; i<prefix.size(); i++){
            if (nptr->child[prefix[i] - 'a'] == NULL)
                return false;
            nptr = nptr->child[prefix[i] - 'a'];
        }
        return true;
    }

};

Leetcode上關於Trie的題

211. Add and Search Word - Data structure design

211. Add and Search Word - Data structure design
這道題題意是建立一個數據結構，能夠有插入字串和查詢是否存在字串的操作，但是查詢操作需要支援模糊查詢，即要滿足如下的條件

addWord(“bad”)
addWord(“dad”)
addWord(“mad”)
search(“pad”) -> false
search(“bad”) -> true
search(“.ad”) -> true
search(“b..”) -> true

這道題的思路就是一個字首樹變形，只不過在查詢操作的時候，如果碰見了「.」則將其每個子節點都搜尋一遍，相當於一個DFS了

class TrieNode {
public:
    TrieNode *child[26];
    bool isWord;
    TrieNode() : isWord(false){
        for (auto &a : child) a = NULL;
    }
};
class WordDictionary {
private:
    TrieNode *root;
public:
    /** Initialize your data structure here. */
    WordDictionary() {
        root = new TrieNode();
    }

    /** Adds a word into the data structure. */
    void addWord(string word) {
        TrieNode* nptr = root;
        for(int i=0;i<word.size();i++){
            int k = word[i]-'a';
            if(nptr->child[k] == NULL)
                nptr->child[k] = new TrieNode();
            nptr = nptr->child[k];
        }
        nptr->isWord = true;
    }
    bool dfs(string word,TrieNode *root){
        if(root == NULL)
            return false;
        if(word.size() == 0)
            return root->isWord;

        TrieNode* nptr = root;

        if(word[0] != '.'){
            int k = word[0]-'a';
            if(nptr->child[k] == NULL)
                return false;
            return dfs(word.substr(1),nptr->child[k]);
        }else{
         //如果該字元為「.」則搜尋其每一個子節點。
            bool tmp = false;
            for(int j=0;j<26;j++)
                if(dfs(word.substr(1),nptr->child[j]) == true)
                    return true;
            return false;
        }
    }
    /** Returns if the word is in the data structure. A word could contain the dot character '.' to represent any one letter. */
    bool search(string word) {
        return dfs(word,root);
    }
};

472. Concatenated Words

472. Concatenated Words
這道題就是給一組字串，然後找出其中所有可以用其他字串拼接成的字串

Input: [“cat”,”cats”,”catsdogcats”,”dog”,”dogcatsdog”,”hippopotamuses”,”rat”,”ratcatdogcat”]
Output: [“catsdogcats”,”dogcatsdog”,”ratcatdogcat”]
Explanation:
“catsdogcats” can be concatenated by “cats”, “dog” and “cats”;
“dogcatsdog” can be concatenated by “dog”, “cats” and “dog”;
“ratcatdogcat” can be concatenated by “rat”, “cat”, “dog” and “cat”.

這道題其實非常的來氣，因為這道題用C++寫的話Trie過不了，在最後一組資料中會報Memory超過限制，但是用Java寫的話，就不會有問題。【看到有同學說是因為Leetcode中用C++寫的話需要釋放記憶體，否則執行多組case會爆memory，但是我實測的結果發現加上手動釋放記憶體依然過不了】
看discuss裡面有個深度優化的Trie寫法能夠解決這個問題:C++ Solutions, Backtrack, DP, or Trie.問題裡的第二樓
不過通用的Trie解法如下

class TrieNode {
public:
    TrieNode *child[26];
    bool isWord;
    TrieNode() : isWord(false) {
        for (auto &a : child) a = NULL;
    }
};
class Trie {
private:
    TrieNode *root;
public:
    /** Initialize your data structure here. */
    Trie() {
        root = new TrieNode();
    }

    /** Inserts a word into the trie. */
    void insert(string word) {
        TrieNode * nptr = root;
        for (int i = 0; i<word.size(); i++) {
            if (nptr->child[word[i] - 'a'] == NULL)
                nptr->child[word[i] - 'a'] = new TrieNode();
            nptr = nptr->child[word[i] - 'a'];
        }
        nptr->isWord = true;
    }

    /** Returns if the word is in the trie. */
    //這個函式返回的是所有能夠切分一個字串的位置
    vector<int> search(string word) {
        vector<int> res;
        TrieNode *nptr = root;
        for (int i = 0; i<word.size(); i++) {
            if (nptr->isWord)
                res.push_back(i);
            if (nptr->child[word[i] - 'a'] == NULL)
                return res;
            nptr = nptr->child[word[i] - 'a'];
        }
        return res;
    }

};

class Solution {
public:
    Trie trie;
    unordered_map<string, int> mark;
    static bool cmp(const string &a,const string &b){
        return a.size()<b.size();
    }
    //k這個主要用來記錄是否是最外層的，如果不是最外層的話，則只需要喊str這個串本身是否含在已包含的字串中就好。
    bool judge(string& str, int k) {
        vector<int> res = trie.search(str);
        //從末端進行搜尋，能夠優化一些效率
        reverse(res.begin(),res.end());
        if (k == 1) {
            if (mark.find(str) != mark.end())
                return true;
        }
        for (int i = 0; i<res.size(); i++) {
            string tmp = str.substr(res[i]);
            if (judge(tmp, 1)) {
                mark[str] = 1;
                return true;
            }
        }
        return false;
    }
    vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
        sort(words.begin(),words.end(),cmp);
        vector<string>  res;
        for (auto && i : words) {
            if(i.size() == 0)
                continue;
            if (judge(i, 0))
                res.push_back(i);
            trie.insert(i);
            mark[i] = 1;
        }
        return res;
    }
};

這個過不去，我也是非常的無奈，最後只要用了個hashmap暴力做，程式碼如下：

unordered_set<string> mark;
static bool cmp(const string &a,const string &b){
    return a.size()<b.size();
}
bool judge(string &word,int pos,string str) {
    if(pos == word.size()){
        if(mark.find(str)!= mark.end())
            return true;
        return false;
    }
    str += word[pos];
    if(mark.find(str) != mark.end()){
        string tmp = "";
        if(judge(word,pos+1,""))
            return true;
    }
    return judge(word,pos+1,str);
}
vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
    sort(words.begin(),words.end(),cmp);
    vector<string>  res;
    for (auto && i : words) {
        if(i.size() == 0)
            continue;
        if (judge(i, 0,""))
            res.push_back(i);
        mark.insert(i);
    }
    return res;
}

212. Word Search II

212. Word Search II
這道題的減弱版是word search I 是給一個圖，然後看如果沿著某一個路徑的話，是否存在一個給定的字串，那就跑一個DFS加回溯就好
如果是一組字串，則需要做一個查詢優化了，就是建一個Trie數，每次從某個節點開始DFS這個圖，然後再搜尋的時候，也對應著在搜尋這顆Trie，如果搜到了以某一個leaf節點，則其就是一個結果，然後再將其置為非葉子結點，避免重複查詢。
具體在實現上，有幾個細節：
1. 每個葉子結點可以就存著這個字串是什麼
2. 其次這道題只用到了Trie的建樹操作即可，剩下的search操作是不需要的，所以只用一個TrieNode資料結構就可以了

vector<string> res;
struct TrieNode{
    vector<TrieNode*> child;
    string word;
    TrieNode():child(vector<TrieNode*>(26,nullptr)),word(""){}
};
TrieNode *buildTrie(vector<string> &words){
    TrieNode *root = new TrieNode();
    for(auto && word:words){
        TrieNode *nptr = root;
        for(int i=0;i<word.size();i++){
            if(nptr->child[word[i] - 'a'] == nullptr)
                nptr->child[word[i] - 'a'] = new TrieNode();
            nptr = nptr->child[word[i] - 'a'] ;
        }
        nptr->word = word;
    }
    return root;
}
void dfs(TrieNode* root,vector<vector<char>>& board,int i,int j){
    //一定要注意這個函式中，幾個跳出迴圈的先後順序，一定一定要注意
    if(root == nullptr ) return;

    if(root->word.size() >0){
        res.push_back(root->word);
        root->word = "";
    }
    int n = board.size();
    int m = board[0].size();
    if(i<0 ||j <0||i>=n|| j>=m)
        return;
    if(board[i][j] == 0)    return;
    //tmp是用來回溯的
    int tmp = board[i][j]-'a';
    board[i][j] = 0;
    dfs(root->child[tmp],board,i-1,j);
    dfs(root->child[tmp],board,i,j-1);
    dfs(root->child[tmp],board,i+1,j);
    dfs(root->child[tmp],board,i,j+1);
    board[i][j] = tmp+'a';
    return;
}
vector<string> findWords(vector<vector<char>>& board, vector<string>& words) {

    auto root = buildTrie(words);
    int n = board.size();
    int m = board[0].size();
    for(int i =0 ;i<n;i++)
        for(int j = 0;j<m;j++)
            dfs(root,board,i,j);
    return res;
}

421. Maximum XOR of Two Numbers in an Array

421. Maximum XOR of Two Numbers in an Array
這道題是給一個數組，讓找出其中兩兩異或之後和最大的結果。需要用O(n)的演算法複雜度
這道題之前在【演算法】按位Bit By Bit的方法裡面有介紹過按位依次搜尋的演算法，這裡用Trie的方法可以再做一遍。
思路就是先將陣列中所有數構建一棵Trie，然後再掃一遍陣列中的每個數，遇到能夠異或得到1的，則這一位是1，否則是0.

struct TrieNode{
    vector<TrieNode*> child;
    TrieNode():child(vector<TrieNode*>(2,nullptr)){}
};
TrieNode* build(vector<int> & nums){
    TrieNode* root = new TrieNode();
    for(auto num:nums){
        TrieNode* nptr = root;
        for(int i = 31;i>=0;i--){
            int k = (num>>i)&1;
            if(nptr->child[k] == nullptr)
                nptr->child[k] = new TrieNode();
            nptr = nptr->child[k];
        }
    }
    return root;
}
int f(TrieNode* root,int num){
    int res = 0;
    for(int i=31;i>=0;i--){
        int k = ((num>>i)&1)^1;
        if(root->child[k]){
            res = (res<<1)|1;
            root = root->child[k];
        }else{
            res = (res<<1);
            root = root->child[k^1];
        }
    }
    return res;
}
int findMaximumXOR(vector<int>& nums) {
    int res = 0;
    auto root = build(nums);
    for(auto num:nums)
        res = max(res,f(root,num));
    return res;
}

其他需要特別注意到的地方

以上的幾道題都用到了遞迴/DFS的寫法，一定要注意遞迴終止條件的先後順序，一定一定要注意，今天碰到了好多的坑點。

【演算法】Trie數（字首樹/字典樹）簡介及Leetcode上關於字首樹的題

Trie簡介

Leetcode上關於Trie的題

211. Add and Search Word - Data structure design

472. Concatenated Words

212. Word Search II

421. Maximum XOR of Two Numbers in an Array

其他需要特別注意到的地方

【演算法】Trie數（字首樹/字典樹）簡介及Leetcode上關於字首樹的題

【Java常用排序演算法】歸併排序（二路歸併排序）

【演算法】二進位制數的逆序輸出

2018.09.29【BZOJ1026】【洛谷P2657】【SCOI2009】windy數（數位DP）

【演算法】尋找1000000000（十億）內素數並統計個數

【linux】tar.gz（bz或bz2等）結尾的源代碼包

【BZOJ3992】序列統計（動態規劃，NTT）

【NOIP2012P】尋寶（於2018.2.12）

P2483 【模板】k短路（[SDOI2010]魔法豬學院）

【Bzoj4289】PA2012 Tax（Dijkstra+技巧建圖）

【CodeForces954G】Castle Defense（二分答案+差分）

【BZOJ1023】仙人掌圖（仙人掌，動態規劃）

【題解】 bzoj3956: Count （ST表+單調棧）

【CF1009F】 Dominant Indices （長鏈剖分）

【CodeForces - 706C】Hard problem（dp，字典序）

【模板】歸併排序（+求逆序對）

【NOIP2018】【Luogu5019】鋪設道路（貪心，差分）

Java定時任務Timer排程器【一】原始碼分析（圖文詳解版）

【POJ】1562Oil Deposits（dfs求聯通塊）

【POJ】1324Holedox Moving（貪吃蛇的bfs）

【演算法】Trie數（字首樹/字典樹）簡介及Leetcode上關於字首樹的題

Trie簡介

Leetcode上關於Trie的題

211. Add and Search Word - Data structure design

472. Concatenated Words

212. Word Search II

421. Maximum XOR of Two Numbers in an Array

其他需要特別注意到的地方

相關推薦