基於Lucene 7.1.0 實現搜尋引擎
阿新 • • 發佈:2018-12-27
引入lucene 7.1.0 所使用的jar包
<dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>7.1.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>7.1.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>7.1.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-backward-codecs</artifactId> <version>7.1.0</version> </dependency> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-suggest</artifactId> <version>7.1.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>7.1.0</version> </dependency>
建立索引
/** * 建立索引 * * @param id ID * @param content 內容 * @param time 時間 */ public void createIndex(int id, String content, Long time) { try { // 索引存放磁碟位置 FSDirectory directory = FSDirectory.open(file.toPath()); // 使用的分詞器 IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(directory, config); // 建立檔案索引 Document document = new Document(); // int型別 存放形式 document.add(new IntPoint("id", id)); // 儲存儲存資訊,不寫不儲存儲存資訊 document.add(new StoredField("id", id)); // 欄位名字,欄位內容,Store:如果是yes 則說明儲存到文件中 document.add(new TextField("content", content, Field.Store.YES)); document.add(new LongPoint("time", time)); document.add(new StoredField("time", time)); // 控制排序欄位 document.add(new NumericDocValuesField("time", time)); writer.addDocument(document); // 必須存在,不然不生效 writer.commit(); writer.close(); directory.close(); } catch (IOException e) { e.printStackTrace(); } }
修改索引
/** * 修改索引 * * @param id ID * @param content 內容 */ public void updateIndex(int id, String content) { try { FSDirectory directory = FSDirectory.open(file.toPath()); IndexReader reader = DirectoryReader.open(directory); IndexSearcher search = new IndexSearcher(reader); // 通過id查詢對應的資料 TopDocs topDocs = search.search(IntPoint.newExactQuery("id", id), 1); ScoreDoc scoreDoc = topDocs.scoreDocs[0]; Document document = search.doc(scoreDoc.doc); document.add(new TextField("content", content, Field.Store.YES)); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(directory, config); // 通過id進行匹配 writer.updateDocument(new Term("id", String.valueOf(id)), document); writer.commit(); writer.close(); directory.close(); } catch (IOException e) { e.printStackTrace(); } }
刪除索引
/**
* 刪除索引
*
* @param field 欄位名稱
* @param value 關鍵詞
*/
public void deleteIndex(String field, String value) {
try {
FSDirectory directory = FSDirectory.open(file.toPath());
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(directory, config);
// 通過匹配欄位進行刪除
writer.deleteDocuments(new Term(field, value));
writer.commit();
writer.close();
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
搜尋
/**
* 搜尋
*
* @param string 關鍵詞
* @param startTime 開始時間
* @param endTime 結束時間
* @param order 排序 0預設打分排序,1時間排序,2檢視次數
* @param reverse 升降序 0升序,1降序
* @param page 頁數
* @param size 一頁的大小
* @return JSONObject
*/
public JSONObject search(String string, Long startTime, Long endTime, int order, boolean reverse, int page, int size) {
List<String> list = new ArrayList<>();
JSONObject object = new JSONObject();
try {
Directory directory = FSDirectory.open(file.toPath());
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher search = new IndexSearcher(reader);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
if (StringUtils.isBlank(string)) {
string = "*";
builder.add(new WildcardQuery(new Term("title", string)), BooleanClause.Occur.MUST);
} else {
builder.add(new QueryParser("content", analyzer).parse(string), BooleanClause.Occur.MUST);
list.add("content");
}
if (startTime != 0 && endTime != 0) {
builder.add(LongPoint.newRangeQuery("time", startTime, endTime), BooleanClause.Occur.MUST);
}
BooleanQuery query = builder.build();
Sort sort = null;
switch (order) {
case 0:
sort = Sort.RELEVANCE;
break;
case 1:
SortField sortField = new SortField("time", SortField.Type.LONG, reverse);
sort = new Sort(sortField);
break;
default:
break;
}
// 通過indexSearcher來搜尋索引 int 條數 sort 排序
TopDocs topDocs = search.search(query, page * size, sort);
// 關鍵字高亮顯示的html標籤
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
// 根據查詢條件匹配出的記錄總數
long count = topDocs.totalHits;
object.put("count", count);
JSONArray array = new JSONArray();
// 打分
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
int start = page - 1;
if (page > 1) {
start = (page - 1) * size;
}
int end = page * size;
if (end > count) {
end = (int) count;
}
if (end > start) {
for (int i = start; i < end; i++) {
JSONObject json = new JSONObject();
Document doc = search.doc(scoreDocs[i].doc);
for (IndexableField field : doc.getFields()) {
String name = field.name();
String value = field.stringValue();
if (list.contains(name)) {
// 內容增加高亮顯示
TokenStream stream = analyzer.tokenStream(name, new StringReader(value));
String highlight = highlighter.getBestFragment(stream, value);
if (highlight == null) {
json.put(name, value);
} else {
json.put(name, highlight);
}
} else {
if (value.length() > 240) {
json.put(name, value.substring(0, 240));
} else {
json.put(name, value);
}
}
}
json.put("score", scoreDocs[i].score);
array.add(json);
}
}
object.put("data", array);
reader.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
return object;
}
相關文章推薦
/**
* 相關文章推薦
*
* @param id ID
* @return JSONArray
*/
public JSONArray moreLikeThis(int id) {
JSONArray array = new JSONArray();
try {
Directory directory = FSDirectory.open(file.toPath());
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
MoreLikeThis mlt = new MoreLikeThis(reader);
// 相似的欄位
mlt.setFieldNames(new String[]{"content"});
mlt.setMinTermFreq(1);
mlt.setMinDocFreq(1);
mlt.setAnalyzer(analyzer);
for (int docID = 0; docID < reader.maxDoc(); docID++) {
Query query = mlt.like(docID);
TopDocs topDocs = searcher.search(query, 10);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
Document doc = reader.document(scoreDoc.doc);
if (scoreDoc.doc != docID && doc.getField("id").numericValue().intValue() == id) {
JSONObject json = new JSONObject();
json.put("id", doc.getField("id").stringValue());
json.put("content", doc.getField("content").stringValue());
array.add(json);
}
}
}
reader.close();
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
return array;
}