1. 程式人生 > >【Lucene】近實時搜尋

【Lucene】近實時搜尋

近實時搜尋:可以使用一個開啟的IndexWriter快速搜尋索引的變更內容,而不必首先關閉writer,或者向該writer提交;這是2.9版本之後推出的新功能。

程式碼示例(本例參考《Lucene In Action》):

package com.tan.code;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class NearRealTimeTest {

	public static final String INDEX_DIR_PATH = "E:\\indexDir";

	private Analyzer analyzer = null;

	private File indexFile = null;

	private Directory directory = null;

	private IndexReader indexReader = null;

	private IndexSearcher indexSearcher = null;
	private IndexWriter indexWriter = null;

	public void nearRealTime() throws IOException {

		analyzer = new IKAnalyzer(true);
		indexFile = new File(INDEX_DIR_PATH);
		directory = new SimpleFSDirectory(indexFile);

		indexWriter = new IndexWriter(directory, new IndexWriterConfig(
				Version.LUCENE_43, analyzer));
		for (int i = 0; i < 10; i++) {
			Document document = new Document();
			document.add(new StringField("id", "id" + i, Store.YES));
			document.add(new TextField("name", "my name is tank" + i, Store.YES));
			document.add(new TextField("content", "content" + i, Store.YES));
			indexWriter.addDocument(document);
		}

		// indexReader = DirectoryReader.open(directory);

		// 3.x 舊版本中使用indexWriter.getReader()建立近實時reader,本示例程式碼使用的是4.3.1版本

		indexReader = DirectoryReader.open(indexWriter, true);
		indexSearcher = new IndexSearcher(indexReader);

		Query query = new TermQuery(new Term("name", "name"));
		TopDocs topDocs = indexSearcher.search(query, 100);
		System.out.println("命中數:" + topDocs.totalHits);

		/*
		 * 建立索引變更,但是不提交
		 */
		Query q = new TermQuery(new Term("id", "id1"));
		indexWriter.deleteDocuments(q);// 根據id刪除一個文件

		// 再新增10個
		for (int i = 10; i < 20; i++) {
			Document document = new Document();
			document.add(new StringField("id", "id" + i, Store.YES));
			document.add(new TextField("name", "my name is tank" + i, Store.YES));
			document.add(new TextField("content", "content" + i, Store.YES));
			indexWriter.addDocument(document);
		}
		//

		// reader.reopen();
		// 讀入新增加的增量索引內容,滿足實時索引需求
		//
		IndexReader newReader = DirectoryReader.openIfChanged(
				(DirectoryReader) indexReader, indexWriter, true);

		indexReader.close();

		Query query2 = new TermQuery(new Term("name", "name"));
		indexSearcher = new IndexSearcher(newReader);

		TopDocs topDocs2 = indexSearcher.search(query2, 100);
		System.out.println("命中數:" + topDocs2.totalHits);

		newReader.close();
		indexWriter.close();
	}
}

測試程式碼:
package com.tan.test;

import static org.junit.Assert.*;

import java.io.IOException;

import org.junit.Test;

import com.tan.code.NearRealTimeTest;

public class MyTest {

	@Test
	public void test() throws IOException {
		//fail("Not yet implemented");
		NearRealTimeTest nearRealTimeTest=new NearRealTimeTest();
		nearRealTimeTest.nearRealTime();
	}

}

測試結果(建議使用Luke檢視索引結果):