1. 程式人生 > >13、自定義Analyzer實現字長過濾

13、自定義Analyzer實現字長過濾

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import java.io.IOException;

/**
 * Created by kangz on 2016/12/16.
 * 自定義Analyzer實現擴充套件停用詞
 */
public class MyAnalzerlen extends Analyzer {

    public MyAnalzerlen(int len) {
        this.len = len;
    }

    private int len;  //過慮的長度

    public int getLen() {
        return len;
    }

    public void setLen(int len) {
        this.len = len;
    }


    @Override
    protected TokenStreamComponents createComponents(String s) {
        Tokenizer source = new WhitespaceTokenizer();
        TokenStream tokenStream = new LengthFilter(source, len, 6);//  分詞器 最小長度開區間  最大長度閉區間
        return new TokenStreamComponents(source, tokenStream);
        //return new TokenStreamComponents(source, new StopFilter(source, stopWordSet));
    }
    public static void main(String[] args) throws Exception {

        //把長度小於2的過濾掉,開區間
        Analyzer analyzer = new MyAnalzerlen(2);
        String words = "I am a java coder ! jjjuje !";
        TokenStream stream = analyzer.tokenStream("myfield", words);
        try {
            stream.reset();
            CharTermAttribute offsetAtt = stream.addAttribute(CharTermAttribute.class);
            while (stream.incrementToken()) {
                System.out.println(offsetAtt.toString());
            }
            stream.end();
            stream.close();
        } catch (IOException e) {
        }
    }
}

TokenStream tokenStream = new LengthFilter(source, len, 6);// 分詞器 最小長度開區間 最大長度閉區間 重點的一句話

下面是小編的微信轉帳二維碼,小編再次謝謝讀者的支援,小編會更努力的

----請看下方↓↓↓↓↓↓↓

百度搜索 Drools從入門到精通:可下載開源全套Drools教程

深度Drools教程不段更新中:


更多Drools實戰陸續釋出中………

掃描下方二維碼關注公眾號 ↓↓↓↓↓↓↓↓↓↓