1. 程式人生 > >lucene專案實戰【一】

lucene專案實戰【一】

package cn.sp.lucene;

import java.io.StringReader;
import java.nio.file.Paths;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import cn.sp.entity.Blog;
import cn.sp.util.DateUtil;
import cn.sp.util.StringUtil;

/**
 * 部落格Lucene索引類
 * @author 2YSP
 *
 */
public class BlogIndex2 {

	private  Directory dir;
	
	/**
	 * 獲取indexWriter例項
	 * @return
	 * @throws Exception
	 */
	public  IndexWriter getWriter() throws Exception{
		//詞典儲存目錄
		 dir = FSDirectory.open(Paths.get("D:\\lucene"));
		 //分詞器
		 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
		 IndexWriterConfig config = new IndexWriterConfig(analyzer);
		 IndexWriter indexWriter = new IndexWriter(dir, config);
		 return indexWriter;
	}
	
	/**
	 * 新增部落格索引
	 * @param blog
	 * @throws Exception 
	 */
	public void addIndex(Blog blog) throws Exception{
		//獲取writer物件
		IndexWriter writer = getWriter();
		//文件物件
		Document doc = new Document();
		//引數1.相當於key  2.值     3.是否儲存
		//主鍵
		doc.add(new StringField("id", String.valueOf(blog.getId()), Store.YES));
		//部落格標題
		doc.add(new TextField("title", blog.getTitle(), Store.YES));//儲存索引
		//部落格內容,不包含html標籤的純文字
		doc.add(new TextField("content", blog.getContentNoTag(), Store.YES));
		//釋出日期
		doc.add(new StringField("releaseDate", blog.getReleaseDateStr(), Store.YES));
		
		//新增
		writer.addDocument(doc);
		//關閉資源
		writer.close();
	}
	/**
	 * 刪除索引
	 * @param id
	 */
	public void delIndex(String id)throws Exception{
		//獲取writer物件
		IndexWriter writer = getWriter();
		//刪除
		writer.deleteDocuments(new Term("id",id));
		//強制刪除
		writer.forceMergeDeletes();
		//事物提交
		writer.commit();
		//關閉資源
		writer.close();
	}
	/**
	 * 修改索引   和新增很類似
	 * @param blog
	 */
	public void updateIndex(Blog blog)throws Exception{
		//獲取writer物件
		IndexWriter writer = getWriter();
		//獲取文件物件
		Document doc = new Document();
		doc.add(new StringField("id",String.valueOf(blog.getId()),Field.Store.YES));//加入索引
		doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
		doc.add(new StringField("releaseDate", DateUtil.formatDate(new Date(), "yyyy-MM-dd"), Field.Store.YES));//釋出日期
		doc.add(new TextField("content", blog.getContentNoTag(),  Field.Store.YES));
		//修改,根據id更新
		writer.updateDocument(new Term("id", String.valueOf(blog.getId())), doc);
		//關閉資源
		writer.close();
	}
	/**
	 * 查詢部落格
	 * @param keyWord
	 * @return
	 * @throws Exception
	 */
	public List<Blog> searchBlog(String keyWord) throws Exception{
		dir = FSDirectory.open(Paths.get("D:\\lucene"));
		//獲取indexReader
		IndexReader reader = DirectoryReader.open(dir);
		//搜尋物件
		IndexSearcher searcher = new IndexSearcher(reader);
		//多個條件
		BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
		SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
		
		//查詢解析器
		QueryParser parser = new QueryParser("title",analyzer);
		//解析後獲得查詢物件
		Query query = parser.parse(keyWord);
		
		QueryParser parser2 = new QueryParser("content",analyzer);
		Query query2 = parser2.parse(keyWord);
		//封裝
		booleanQuery.add(query, Occur.SHOULD);//應該發生,非必須條件
		booleanQuery.add(query2, Occur.SHOULD);
		
		//只查詢前100條
		TopDocs hits = searcher.search(booleanQuery.build(), 100);
		//以標題為計分器
		QueryScorer queryScorer = new QueryScorer(query);
		Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
		//字型加紅加粗
		SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
		Highlighter lighter = new Highlighter(htmlFormatter, queryScorer);
		lighter.setTextFragmenter(fragmenter);
		
		List<Blog> blogList = new LinkedList<Blog>();
		for(ScoreDoc scoreDoc: hits.scoreDocs){
			Document doc = searcher.doc(scoreDoc.doc);
			//封裝blog物件
			Blog blog  = new Blog();
			blog.setId(Integer.parseInt(doc.get("id")));
			blog.setReleaseDateStr(doc.get("releaseDate"));
			String title = doc.get("title");
			String content = StringEscapeUtils.escapeHtml(doc.get("content"));//去html
			
			if(title != null){
				TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title));
				String hTitle = lighter.getBestFragment(tokenStream, title);
				if(StringUtil.isEmpty(hTitle)){//沒有高亮的
					blog.setTitle(title);//不含關鍵字的
				}else{
					blog.setTitle(hTitle);//高亮的
				}
			}
			
			if(content != null){
				TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
				String hContent = lighter.getBestFragment(tokenStream, content);
				if(StringUtil.isEmpty(hContent)){
					if(content.length() > 200){
						//內容限制200
						blog.setContent(content.substring(0,200));
					}else{
						blog.setContent(content);
					}
					
				}else{
					blog.setContent(hContent);
				}
			}
			
			//新增到集合
			blogList.add(blog);
		}
		return blogList;
	}
}