java Trie實現英文單詞查詢樹 搜尋自動提示
阿新 • • 發佈:2018-12-16
原理解釋文章:https://blog.csdn.net/beiyetengqing/article/details/7856113
程式碼應用:
wordTrie.txt(工具類):
package com.xq.algorithm; import java.util.ArrayList; import java.util.List; /** * * <p>Title:</p> * <p>Description: 單詞Trie樹 * </p> * @createDate:2013-10-17 * @author xq * @version 1.0 */ public class WordTrie { /**trie樹根*/ private TrieNode root = new TrieNode(); /**英文字串正則匹配*/ static String englishPattern="^[A-Za-z]+$"; /**中文正則匹配*/ static String chinesePattern="[\u4e00-\u9fa5]"; static int ARRAY_LENGTH=26; static String zeroString=""; /** * * @Title: addWord * @Description: add word * @param @param word * @return void * @throws */ public void addWord(String word) { if(word==null || "".equals(word.trim())){ throw new IllegalArgumentException("word can not be null!"); } if(!word.matches(englishPattern)){ throw new IllegalArgumentException("word must be english!"); } addWord(root, word); } /** * * @Title: addWord * @Description:add word to node * @param @param node * @param @param word * @return void * @throws */ private void addWord(TrieNode node, String word) { if (word.length() == 0) { // if all characters of the word has been // added node.count++; node.nodeState=1; } else { node.prefixCount++; char c = word.charAt(0); c = Character.toLowerCase(c); int index = c - 'a'; if(index>=0 && index<ARRAY_LENGTH){ if (node.next[index] == null) { node.next[index] = new TrieNode(); } // go the the next character addWord(node.next[index], word.substring(1)); } } } /** * * @Title: prefixSearchWord * @Description: 字首搜尋 * @param @param word * @param @return * @return List<String> * @throws */ public List<String> prefixSearchWord(String word){ if(word==null || "".equals(word.trim())){ return new ArrayList<String>(); } if(!word.matches(englishPattern)){ return new ArrayList<String>(); } char c = word.charAt(0); c = Character.toLowerCase(c); int index = c - 'a'; if(root.next!=null && root.next[index]!=null){ return depthSearch(root.next[index],new ArrayList<String>(),word.substring(1),""+c,word); }else{ return new ArrayList<String>(); } } /** * * @Title: searchWord * @Description: 搜尋單詞,以a-z為根,分別向下遞迴搜尋 * @param @param word * @param @return * @return List<String> * @throws */ public List<String> searchWord(String word){ if(word==null || "".equals(word.trim())){ return new ArrayList<String>(); } if(!word.matches(englishPattern)){ return new ArrayList<String>(); } char c = word.charAt(0); c = Character.toLowerCase(c); int index = c - 'a'; List<String> list=new ArrayList<String>(); if(root.next==null){ return list; } for(int i=0;i<ARRAY_LENGTH;i++){ int j='a'+i; char temp=(char)j; if(root.next[i]!=null){ if(index==i){ fullSearch(root.next[i],list,word.substring(1),""+temp,word); }else{ fullSearch(root.next[i],list,word,""+temp,word); } } } return list; } /** * * @Title: fullSearch * @Description: 匹配到對應的字母,則以該字母為字根,繼續匹配完所有的單詞。 * @param @param node * @param @param list 儲存搜尋到的字串 * @param @param word 搜尋的單詞.匹配到第一個則減去一個第一個,連續匹配,直到word為空串.若沒有連續匹配,則恢復到原串。 * @param @param matchedWord 匹配到的單詞 * @param @return * @return List<String> * @throws */ private List<String> fullSearch(TrieNode node,List<String> list,String word,String matchedWord,String inputWord){ if(node.nodeState==1 && word.length()==0){ list.add(matchedWord); } if(word.length() != 0){ char c = word.charAt(0); c = Character.toLowerCase(c); int index = c - 'a'; for(int i=0;i<ARRAY_LENGTH;i++){ if(node.next[i]!=null){ int j='a'+i; char temp=(char)j; if(index==i){ //連續匹配 fullSearch(node.next[i], list, word.substring(1), matchedWord+temp,inputWord); }else{ //未連續匹配,則重新匹配 fullSearch(node.next[i], list, inputWord, matchedWord+temp,inputWord); } } } }else{ if(node.prefixCount>0){ for(int i=0;i<ARRAY_LENGTH;i++){ if(node.next[i]!=null){ int j='a'+i; char temp=(char)j; fullSearch(node.next[i], list, zeroString, matchedWord+temp,inputWord); } } } } return list; } /** * * @Title: depthSearch * @Description: 深度遍歷子樹 * @param @param node * @param @param list 儲存搜尋到的字串 * @param @param word 搜尋的單詞.匹配到第一個則減去一個第一個,連續匹配,直到word為空串.若沒有連續匹配,則恢復到原串。 * @param @param matchedWord 匹配到的單詞 * @param @return * @return List<String> * @throws */ private List<String> depthSearch(TrieNode node,List<String> list,String word,String matchedWord,String inputWord){ if(node.nodeState==1 && word.length()==0){ list.add(matchedWord); } if(word.length() != 0){ char c = word.charAt(0); c = Character.toLowerCase(c); int index = c - 'a'; //繼續完全匹配,直到word為空串,否則未找到 if(node.next[index]!=null){ depthSearch(node.next[index], list, word.substring(1), matchedWord+c,inputWord); } }else{ if(node.prefixCount>0){//若匹配單詞結束,但是trie中的單詞並沒有完全找到,需繼續找到trie中的單詞結束. //node.prefixCount>0表示trie中的單詞還未結束 for(int i=0;i<ARRAY_LENGTH;i++){ if(node.next[i]!=null){ int j='a'+i; char temp=(char)j; depthSearch(node.next[i], list, zeroString, matchedWord+temp,inputWord); } } } } return list; } class TrieNode { /** * trie tree word count */ int count=0; /** * trie tree prefix count */ int prefixCount=0; /** * 指向各個子樹的指標,儲存26個字母[a-z] */ TrieNode[] next=new TrieNode[26]; /** * 當前TrieNode狀態 ,預設 0 , 1表示從根節點到當前節點的路徑表示一個詞 */ int nodeState = 0; TrieNode(){ count=0; prefixCount=0; next=new TrieNode[26]; nodeState = 0; } } }
測試類:
package com.xq.algorithm; import java.util.List; public class WordTrieMain { public static void main(String[] args){ test(); } public static void test1(){ WordTrie trie=new WordTrie(); trie.addWord("ibiyzbi"); System.out.println("----------------------------------------"); List<String> words=trie.searchWord("bi"); for(String s: words){ System.out.println(s); } } public static void test(){ WordTrie trie=new WordTrie(); trie.addWord("abi"); trie.addWord("ai"); trie.addWord("aqi"); trie.addWord("biiiyou"); trie.addWord("dqdi"); trie.addWord("ji"); trie.addWord("li"); trie.addWord("liqing"); trie.addWord("liqq"); trie.addWord("liqqq"); trie.addWord("qi"); trie.addWord("qibi"); trie.addWord("i"); trie.addWord("ibiyzbi"); List<String> list=trie.prefixSearchWord("li"); for(String s: list){ System.out.println(s); } System.out.println("----------------------------------------"); List<String> li=trie.searchWord("i"); for(String s: li){ System.out.println(s); } System.out.println("----------------------------------------"); List<String> words=trie.searchWord("bi"); for(String s: words){ System.out.println(s); } System.out.println("----------------------------------------"); List<String> lst=trie.searchWord("q"); for(String s: lst){ System.out.println(s); } } }