1. 程式人生 > >Lucene筆記24-Lucene的使用-自定義評分簡介

Lucene筆記24-Lucene的使用-自定義評分簡介

一、自定義評分流程

有時候,Lucene提供的計算評分規則可能不符合業務需求,所以我們需要自定義評分規則,來實現自定義評分。自定義評分的流程:建立一個類繼承CustomScoreQuery、重寫getCustomScoreProvider()方法、建立CustomScoreProvider類、重寫customScore()方法。

二、程式碼實現

package com.wsy;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.search.function.FieldScoreQuery;
import org.apache.lucene.search.function.ValueSourceQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Random;

public class MyScoreQuery {
    private static Directory directory;
    private static IndexReader indexReader;

    static {
        try {
            directory = FSDirectory.open(new File("E:\\Lucene\\IndexLibrary"));
            indexReader = IndexReader.open(directory);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void index(boolean update) {
        IndexWriter indexWriter = null;
        try {
            indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
            if (update) {
                indexWriter.deleteAll();
            }
            File[] files = new File("E:\\Lucene\\SearchSource").listFiles();
            Random random = new Random();
            for (File file : files) {
                int score = random.nextInt(100);
                Document document = new Document();
                document.add(new Field("content", new FileReader(file)));
                document.add(new Field("fileName", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));
                document.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length())));
                document.add(new NumericField("score", Field.Store.YES, true).setIntValue(score));
                indexWriter.addDocument(document);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (indexWriter != null) {
                try {
                    indexWriter.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public void searchByScoreQuery() {
        try {
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            Query query = new TermQuery(new Term("content", "java"));
            // 建立一個評分域
            FieldScoreQuery fieldScoreQuery = new FieldScoreQuery("score", FieldScoreQuery.Type.INT);
            // 根據評分域和原有的Query建立自定義的Query物件
            MyCustomScoreQuery myCustomScoreQuery = new MyCustomScoreQuery(query, fieldScoreQuery);
            TopDocs topDocs = indexSearcher.search(myCustomScoreQuery, 100);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document document = indexSearcher.doc(scoreDoc.doc);
                System.out.println(scoreDoc.doc + "-->" + scoreDoc.score + "-->" + document.get("fileName") + "-->" + document.get("score") + "-->" + document.get("size"));
            }
            indexSearcher.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private class MyCustomScoreQuery extends CustomScoreQuery {
        // subQuery表示原有query,valSrcQueries表示自定義評分的query
        public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQueries) {
            super(subQuery, valSrcQueries);
        }

        @Override
        protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException {
            // 預設情況實現的評分是通過原有評分*傳進來的評分域所獲取的評分確定的
            // return super.getCustomScoreProvider(reader);
            // 為了根據不用的需求進行評分,需要自己進行評分設定
            return new MyCustomScoreProvider(indexReader);
        }
    }

    private class MyCustomScoreProvider extends CustomScoreProvider {
        public MyCustomScoreProvider(IndexReader reader) {
            super(reader);
        }

        // subQueryScore表示預設文件打分
        // valSrcScore表示自定義評分域的打分
        @Override
        public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
            // return super.customScore(doc, subQueryScore, valSrcScore);
            // 在這裡進行評分的修改操作
            return subQueryScore / valSrcScore;
        }
    }

    public static void main(String[] args) {
        MyScoreQuery myScoreQuery = new MyScoreQuery();
        myScoreQuery.index(true);
        myScoreQuery.searchByScoreQuery();
    }
}

原來的評分是subQueryScore * valSrcScore來完成的,為了演示效果,我們將customScore()方法的返回值改為subQueryScore / valSrcScore,再次檢視結果,發現和剛才的預設評分顯示的評分已經不一樣了。這裡先大致瞭解一下怎麼去通過重寫getCustomScoreProvider()方法和customScore()方法來實現自定義分數,下一節我們將學習根據域進行評分設定。