1. 程式人生 > >Lucene中文分詞以及關鍵字的高亮

Lucene中文分詞以及關鍵字的高亮

    渲染在頁面的關鍵字高亮, 以及適用於國內形式的中文分詞器.

    需要引用的jar 看Lucene的第一章的pom.xml

package com.zero.lucene;

import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene
.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache
.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter
; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * 搜尋的時候 關鍵字紅色 * @author samuel * */ public class SearcherColor { private IndexReader indexReader; private IndexSearcher indexSearcher; /** * 搜尋索引 * @throws Exception */ public void searcher() throws Exception { String uri = ""; Directory directory = FSDirectory.open(Paths.get(uri)); indexReader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(indexReader); // 中文分詞 Analyzer analyer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("title", analyer); Query query = parser.parse("pwd.txt"); TopDocs topDocs = indexSearcher.search(query, 10); // 高亮部分 QueryScorer queryScorer = new QueryScorer(query); // 設定關鍵的高亮度字型和顏色 Formatter formatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); Highlighter highligther = new Highlighter(formatter, queryScorer); Fragmenter fragmentScorer = new SimpleSpanFragmenter(queryScorer); highligther.setTextFragmenter(fragmentScorer); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = indexReader.document(scoreDoc.doc); String title = doc.get("title"); if (null != title) { // 拿到最高分數的內容片斷 TokenStream tokenStream = analyer.tokenStream("title", "hahahah"); String bestSpan = highligther.getBestFragment(tokenStream, title); System.out.println(bestSpan); } } } }