1. 程式人生 > >基於Lucene 7.1.0 實現搜尋引擎

基於Lucene 7.1.0 實現搜尋引擎

引入lucene 7.1.0 所使用的jar包

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-analyzers-smartcn</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-highlighter</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-backward-codecs</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>com.janeluo</groupId>
    <artifactId>ikanalyzer</artifactId>
    <version>2012_u6</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-suggest</artifactId>
    <version>7.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-queryparser</artifactId>
    <version>7.1.0</version>
</dependency>
建立索引
/**
 * 建立索引
 *
 * @param id      ID
 * @param content 內容
 * @param time    時間
 */
public void createIndex(int id, String content, Long time) {
    try {
        // 索引存放磁碟位置
        FSDirectory directory = FSDirectory.open(file.toPath());
        // 使用的分詞器
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        // 建立檔案索引
        Document document = new Document();
        // int型別 存放形式
        document.add(new IntPoint("id", id));
        // 儲存儲存資訊,不寫不儲存儲存資訊
        document.add(new StoredField("id", id));

        // 欄位名字,欄位內容,Store:如果是yes 則說明儲存到文件中
        document.add(new TextField("content", content, Field.Store.YES));

        document.add(new LongPoint("time", time));
        document.add(new StoredField("time", time));
        // 控制排序欄位
        document.add(new NumericDocValuesField("time", time));

        writer.addDocument(document);
        // 必須存在,不然不生效
        writer.commit();
        writer.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
修改索引
/**
 * 修改索引
 *
 * @param id      ID
 * @param content 內容
 */
public void updateIndex(int id, String content) {
    try {
        FSDirectory directory = FSDirectory.open(file.toPath());
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher search = new IndexSearcher(reader);

        // 通過id查詢對應的資料
        TopDocs topDocs = search.search(IntPoint.newExactQuery("id", id), 1);
        ScoreDoc scoreDoc = topDocs.scoreDocs[0];
        Document document = search.doc(scoreDoc.doc);

        document.add(new TextField("content", content, Field.Store.YES));

        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        // 通過id進行匹配
        writer.updateDocument(new Term("id", String.valueOf(id)), document);
        writer.commit();
        writer.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
刪除索引
/**
 * 刪除索引
 *
 * @param field 欄位名稱
 * @param value 關鍵詞
 */
public void deleteIndex(String field, String value) {
    try {
        FSDirectory directory = FSDirectory.open(file.toPath());
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(directory, config);
        // 通過匹配欄位進行刪除
        writer.deleteDocuments(new Term(field, value));
        writer.commit();
        writer.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
搜尋
/**
 * 搜尋
 *
 * @param string    關鍵詞
 * @param startTime 開始時間
 * @param endTime   結束時間
 * @param order     排序 0預設打分排序,1時間排序,2檢視次數
 * @param reverse   升降序 0升序,1降序
 * @param page      頁數
 * @param size      一頁的大小
 * @return JSONObject
 */
public JSONObject search(String string, Long startTime, Long endTime, int order, boolean reverse, int page, int size) {
    List<String> list = new ArrayList<>();
    JSONObject object = new JSONObject();
    try {
        Directory directory = FSDirectory.open(file.toPath());
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher search = new IndexSearcher(reader);
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        if (StringUtils.isBlank(string)) {
            string = "*";
            builder.add(new WildcardQuery(new Term("title", string)), BooleanClause.Occur.MUST);
        } else {
            builder.add(new QueryParser("content", analyzer).parse(string), BooleanClause.Occur.MUST);
            list.add("content");
        }

        if (startTime != 0 && endTime != 0) {
            builder.add(LongPoint.newRangeQuery("time", startTime, endTime), BooleanClause.Occur.MUST);
        }
        BooleanQuery query = builder.build();

        Sort sort = null;
        switch (order) {
            case 0:
                sort = Sort.RELEVANCE;
                break;
            case 1:
                SortField sortField = new SortField("time", SortField.Type.LONG, reverse);
                sort = new Sort(sortField);
                break;
            default:
                break;
        }
        // 通過indexSearcher來搜尋索引     int 條數  sort 排序
        TopDocs topDocs = search.search(query, page * size, sort);
        // 關鍵字高亮顯示的html標籤
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
        // 根據查詢條件匹配出的記錄總數
        long count = topDocs.totalHits;
        object.put("count", count);
        JSONArray array = new JSONArray();
        // 打分
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        int start = page - 1;
        if (page > 1) {
            start = (page - 1) * size;
        }
        int end = page * size;
        if (end > count) {
            end = (int) count;
        }
        if (end > start) {
            for (int i = start; i < end; i++) {
                JSONObject json = new JSONObject();
                Document doc = search.doc(scoreDocs[i].doc);
                for (IndexableField field : doc.getFields()) {
                    String name = field.name();
                    String value = field.stringValue();
                    if (list.contains(name)) {
                        // 內容增加高亮顯示
                        TokenStream stream = analyzer.tokenStream(name, new StringReader(value));
                        String highlight = highlighter.getBestFragment(stream, value);
                        if (highlight == null) {
                            json.put(name, value);
                        } else {
                            json.put(name, highlight);
                        }
                    } else {
                        if (value.length() > 240) {
                            json.put(name, value.substring(0, 240));
                        } else {
                            json.put(name, value);
                        }
                    }
                }
                json.put("score", scoreDocs[i].score);
                array.add(json);
            }
        }
        object.put("data", array);
        reader.close();
        directory.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return object;
}
相關文章推薦
/**
 * 相關文章推薦
 *
 * @param id ID
 * @return JSONArray
 */
public JSONArray moreLikeThis(int id) {
    JSONArray array = new JSONArray();
    try {
        Directory directory = FSDirectory.open(file.toPath());
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);

        MoreLikeThis mlt = new MoreLikeThis(reader);
        // 相似的欄位
        mlt.setFieldNames(new String[]{"content"});
        mlt.setMinTermFreq(1);
        mlt.setMinDocFreq(1);
        mlt.setAnalyzer(analyzer);

        for (int docID = 0; docID < reader.maxDoc(); docID++) {
            Query query = mlt.like(docID);
            TopDocs topDocs = searcher.search(query, 10);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document doc = reader.document(scoreDoc.doc);
                if (scoreDoc.doc != docID && doc.getField("id").numericValue().intValue() == id) {
                    JSONObject json = new JSONObject();
                    json.put("id", doc.getField("id").stringValue());
                    json.put("content", doc.getField("content").stringValue());
                    array.add(json);
                }
            }
        }
        reader.close();
        directory.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return array;
}