lucene專案實戰【一】
阿新 • • 發佈:2018-12-31
package cn.sp.lucene; import java.io.StringReader; import java.nio.file.Paths; import java.util.Date; import java.util.LinkedList; import java.util.List; import org.apache.commons.lang.StringEscapeUtils; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import cn.sp.entity.Blog; import cn.sp.util.DateUtil; import cn.sp.util.StringUtil; /** * 部落格Lucene索引類 * @author 2YSP * */ public class BlogIndex2 { private Directory dir; /** * 獲取indexWriter例項 * @return * @throws Exception */ public IndexWriter getWriter() throws Exception{ //詞典儲存目錄 dir = FSDirectory.open(Paths.get("D:\\lucene")); //分詞器 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(dir, config); return indexWriter; } /** * 新增部落格索引 * @param blog * @throws Exception */ public void addIndex(Blog blog) throws Exception{ //獲取writer物件 IndexWriter writer = getWriter(); //文件物件 Document doc = new Document(); //引數1.相當於key 2.值 3.是否儲存 //主鍵 doc.add(new StringField("id", String.valueOf(blog.getId()), Store.YES)); //部落格標題 doc.add(new TextField("title", blog.getTitle(), Store.YES));//儲存索引 //部落格內容,不包含html標籤的純文字 doc.add(new TextField("content", blog.getContentNoTag(), Store.YES)); //釋出日期 doc.add(new StringField("releaseDate", blog.getReleaseDateStr(), Store.YES)); //新增 writer.addDocument(doc); //關閉資源 writer.close(); } /** * 刪除索引 * @param id */ public void delIndex(String id)throws Exception{ //獲取writer物件 IndexWriter writer = getWriter(); //刪除 writer.deleteDocuments(new Term("id",id)); //強制刪除 writer.forceMergeDeletes(); //事物提交 writer.commit(); //關閉資源 writer.close(); } /** * 修改索引 和新增很類似 * @param blog */ public void updateIndex(Blog blog)throws Exception{ //獲取writer物件 IndexWriter writer = getWriter(); //獲取文件物件 Document doc = new Document(); doc.add(new StringField("id",String.valueOf(blog.getId()),Field.Store.YES));//加入索引 doc.add(new TextField("title", blog.getTitle(), Field.Store.YES)); doc.add(new StringField("releaseDate", DateUtil.formatDate(new Date(), "yyyy-MM-dd"), Field.Store.YES));//釋出日期 doc.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES)); //修改,根據id更新 writer.updateDocument(new Term("id", String.valueOf(blog.getId())), doc); //關閉資源 writer.close(); } /** * 查詢部落格 * @param keyWord * @return * @throws Exception */ public List<Blog> searchBlog(String keyWord) throws Exception{ dir = FSDirectory.open(Paths.get("D:\\lucene")); //獲取indexReader IndexReader reader = DirectoryReader.open(dir); //搜尋物件 IndexSearcher searcher = new IndexSearcher(reader); //多個條件 BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); //查詢解析器 QueryParser parser = new QueryParser("title",analyzer); //解析後獲得查詢物件 Query query = parser.parse(keyWord); QueryParser parser2 = new QueryParser("content",analyzer); Query query2 = parser2.parse(keyWord); //封裝 booleanQuery.add(query, Occur.SHOULD);//應該發生,非必須條件 booleanQuery.add(query2, Occur.SHOULD); //只查詢前100條 TopDocs hits = searcher.search(booleanQuery.build(), 100); //以標題為計分器 QueryScorer queryScorer = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); //字型加紅加粗 SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); Highlighter lighter = new Highlighter(htmlFormatter, queryScorer); lighter.setTextFragmenter(fragmenter); List<Blog> blogList = new LinkedList<Blog>(); for(ScoreDoc scoreDoc: hits.scoreDocs){ Document doc = searcher.doc(scoreDoc.doc); //封裝blog物件 Blog blog = new Blog(); blog.setId(Integer.parseInt(doc.get("id"))); blog.setReleaseDateStr(doc.get("releaseDate")); String title = doc.get("title"); String content = StringEscapeUtils.escapeHtml(doc.get("content"));//去html if(title != null){ TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title)); String hTitle = lighter.getBestFragment(tokenStream, title); if(StringUtil.isEmpty(hTitle)){//沒有高亮的 blog.setTitle(title);//不含關鍵字的 }else{ blog.setTitle(hTitle);//高亮的 } } if(content != null){ TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content)); String hContent = lighter.getBestFragment(tokenStream, content); if(StringUtil.isEmpty(hContent)){ if(content.length() > 200){ //內容限制200 blog.setContent(content.substring(0,200)); }else{ blog.setContent(content); } }else{ blog.setContent(hContent); } } //新增到集合 blogList.add(blog); } return blogList; } }