1. 程式人生 > >Lucene筆記25-Lucene的使用-根據域進行評分設定

Lucene筆記25-Lucene的使用-根據域進行評分設定

一、需求

根據檔名來設定評分規則,或者根據文件的修改時間,將最近一年的評分加倍,一年以外的評分降低,等等。

二、具體實現

這裡根據檔名來修改評分規則,檔名中包含“JRE”和“SYSTEM”的評分加倍,其餘減倍。重點就是怎麼獲取到檔名,在customScore()方法中,有一個doc變數,我們可以通過這個doc來獲取文件,從而獲取檔名。另外還有一點就是Lucene有域快取,只要IndexReader沒有關閉,所有的資料都會存在域快取中,我們可以利用這個特性,將檔名這個域的內容取出來。

package com.wsy;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Random;

public class MyScoreQuery {
    private static Directory directory;
    private static IndexReader indexReader;

    static {
        try {
            directory = FSDirectory.open(new File("E:\\Lucene\\IndexLibrary"));
            indexReader = IndexReader.open(directory);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void index(boolean update) {
        IndexWriter indexWriter = null;
        try {
            indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
            if (update) {
                indexWriter.deleteAll();
            }
            File[] files = new File("E:\\Lucene\\SearchSource").listFiles();
            Random random = new Random();
            for (File file : files) {
                int score = random.nextInt(100);
                Document document = new Document();
                document.add(new Field("content", new FileReader(file)));
                document.add(new Field("fileName", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));
                document.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length())));
                document.add(new NumericField("score", Field.Store.YES, true).setIntValue(score));
                indexWriter.addDocument(document);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (indexWriter != null) {
                try {
                    indexWriter.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public void searchByFileScoreQuery() {
        try {
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            Query query = new TermQuery(new Term("content", "java"));
            // 根據評分域和原有的Query建立自定義的Query物件
            FileNameScoreQuery fileNameScoreQuery = new FileNameScoreQuery(query);
            TopDocs topDocs = indexSearcher.search(fileNameScoreQuery, 100);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document document = indexSearcher.doc(scoreDoc.doc);
                System.out.println(scoreDoc.doc + "-->" + scoreDoc.score + "-->" + document.get("fileName") + "-->" + document.get("score") + "-->" + document.get("size"));
            }
            indexSearcher.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private class FileNameScoreQuery extends CustomScoreQuery {
        public FileNameScoreQuery(Query subQuery) {
            super(subQuery);
        }

        @Override
        protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException {
            // return super.getCustomScoreProvider(reader);
            return new FileNameScoreProvider(indexReader);
        }
    }

    private class FileNameScoreProvider extends CustomScoreProvider {
        String[] fileNames = null;

        public FileNameScoreProvider(IndexReader reader) {
            super(reader);
            try {
                // 從域快取中獲取資料,這裡獲取域值為fileName的資訊
                fileNames = FieldCache.DEFAULT.getStrings(indexReader, "fileName");
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        @Override
        public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
            // 在IndexReader沒有關閉之前,所有的資料都會儲存在域快取中,可以通過域快取獲取到有用的資訊
            // 優點:速度快;缺點:會佔用大量的記憶體
            // 根據doc獲取fileName的值
            String fileName = fileNames[doc];
            if (fileName.contains("JRE") || fileName.contains("SYSTEM")) {
                return 10 * subQueryScore;
            }
            return subQueryScore / 10;
            // return super.customScore(doc, subQueryScore, valSrcScore);
        }
    }

    public static void main(String[] args) {
        MyScoreQuery myScoreQuery = new MyScoreQuery();
        myScoreQuery.index(true);
        myScoreQuery.searchByFileScoreQuery();
    }
}