1. 程式人生 > >Lucene4.7.2 搜尋與高亮顯示

Lucene4.7.2 搜尋與高亮顯示

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>cn.et</groupId>
  <artifactId>LuceneScoreSearch</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <parent>  
	<groupId>org.springframework.boot</groupId>  
	<artifactId>spring-boot-starter-parent</artifactId>  
	<version>1.5.9.RELEASE</version>  
  </parent>  
  <dependencies>
    <dependency>  
      <groupId>org.springframework.boot</groupId>  
      <artifactId>spring-boot-starter-web</artifactId>  
    </dependency>
    <dependency>  
        <groupId>com.janeluo</groupId>  
        <artifactId>ikanalyzer</artifactId>  
        <version>2012_u6</version>  
    </dependency>
    <dependency>  
        <groupId> org.apache.lucene</groupId>  
        <artifactId>lucene-highlighter</artifactId>  
        <version>4.7.2</version>  
    </dependency>  
    <!-- JUnit Java語言的單元測試框架 -->
	<dependency>
	    <groupId>junit</groupId>
	    <artifactId>junit</artifactId>
	    <version>4.12</version>
	    <scope>test</scope>
	</dependency>
  </dependencies>
  <build>
    <plugins>
	  <plugin>
	    <groupId>org.apache.maven.plugins</groupId>
	  	<artifactId>maven-compiler-plugin</artifactId>
	  	<configuration>
	  	  <source>1.7</source>
	  	  <target>1.7</target>
	  	  <encoding>UTF-8</encoding>
	  	</configuration>
	  </plugin>
    </plugins>
  </build>
</project>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>cn.et</groupId>
  <artifactId>LuceneScoreSearch</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <parent>  
	<groupId>org.springframework.boot</groupId>  
	<artifactId>spring-boot-starter-parent</artifactId>  
	<version>1.5.9.RELEASE</version>  
  </parent>  
  <dependencies>
    <dependency>  
      <groupId>org.springframework.boot</groupId>  
      <artifactId>spring-boot-starter-web</artifactId>  
    </dependency>
    <dependency>  
        <groupId>com.janeluo</groupId>  
        <artifactId>ikanalyzer</artifactId>  
        <version>2012_u6</version>  
    </dependency>
    <dependency>  
        <groupId> org.apache.lucene</groupId>  
        <artifactId>lucene-highlighter</artifactId>  
        <version>4.7.2</version>  
    </dependency>  
    <dependency>
	    <groupId>junit</groupId>
	    <artifactId>junit</artifactId>
	    <version>4.10</version>
	    <scope>test</scope>
	</dependency>
  </dependencies>
  <build>
    <plugins>
	  <plugin>
	    <groupId>org.apache.maven.plugins</groupId>
	  	<artifactId>maven-compiler-plugin</artifactId>
	  	<configuration>
	  	  <source>1.7</source>
	  	  <target>1.7</target>
	  	  <encoding>UTF-8</encoding>
	  	</configuration>
	  </plugin>
    </plugins>
  </build>
</project>
package cn.et;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.wltea.analyzer.lucene.IKAnalyzer;
@RestController
public class LueneTesting {
	//建立IKAnalyzer分詞器
	static Analyzer analyzer = new IKAnalyzer();
	//建立索引,寫入檔案
	public static void write() throws Exception { 
		//索引存放目錄
		Directory directory = FSDirectory.open(new File("H:/Lucene/index"));
		//Lucene分詞器配置
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
		IndexWriter iwriter = new IndexWriter(directory, config);
		//建立文件物件,相當於資料庫中的每條記錄(MongoDB、Oracle、MySQL...),注意:物件簡述純屬虛構,不帶任何攻擊惡意
		Document doc0 = new Document();
		Field doc0field1 = new Field("AGE","20",TextField.TYPE_STORED);
		Field doc0field2 = new Field("NAME","路橙",TextField.TYPE_STORED);
		Field doc0field3 = new Field("BRIEF","來自中國湖南永州,是一名初級Java開發工程師,中國網際網路技術部落格:http://blog.csdn.net/phone13144830339",TextField.TYPE_STORED);
		doc0.add(doc0field1);
		doc0.add(doc0field2);
		doc0.add(doc0field3);
		Document doc1 = new Document();
		Field doc1field1 = new Field("AGE","21",TextField.TYPE_STORED);
		Field doc1field2 = new Field("NAME","謝飛",TextField.TYPE_STORED);
		Field doc1field3 = new Field("BRIEF","來自中國湖北武漢,是一名語文老師,中國教育網成員,2010年評選為中國10大優秀教師",TextField.TYPE_STORED);
		doc1.add(doc1field1);
		doc1.add(doc1field2);
		doc1.add(doc1field3);
		Document doc2 = new Document();
		Field doc2field1 = new Field("AGE","22",TextField.TYPE_STORED);
		Field doc2field2 = new Field("NAME","鄧娟",TextField.TYPE_STORED);
		Field doc2field3 = new Field("BRIEF","來自中國四川綿陽,是一名幼兒園老師",TextField.TYPE_STORED);
		doc2.add(doc2field1);
		doc2.add(doc2field2);
		doc2.add(doc2field3);
		Document doc3 = new Document();
		Field doc3field1 = new Field("AGE","23",TextField.TYPE_STORED);
		Field doc3field2 = new Field("NAME","曹焰斌",TextField.TYPE_STORED);
		Field doc3field3 = new Field("BRIEF","來自中國廣東廣州,是一名建築工人",TextField.TYPE_STORED);
		doc3.add(doc3field1);
		doc3.add(doc3field2);
		doc3.add(doc3field3);
		Document doc4 = new Document();
		Field doc4field1 = new Field("AGE","24",TextField.TYPE_STORED);
		Field doc4field2 = new Field("NAME","SMISI",TextField.TYPE_STORED);
		Field doc4field3 = new Field("BRIEF","來自美國底特律,是一名外資企業經理",TextField.TYPE_STORED);
		doc4.add(doc4field1);
		doc4.add(doc4field2);
		doc4.add(doc4field3);
		iwriter.addDocument(doc0);
		iwriter.addDocument(doc1);
		iwriter.addDocument(doc2);
		iwriter.addDocument(doc3);
		iwriter.addDocument(doc4);
		iwriter.commit();
		iwriter.close();
	}
	//查詢索引,檢視得分情況
	@RequestMapping("/simpleSearchScore") 
	public static String simpleSearch() throws Exception {
		String content = "中國";
		Directory directory = FSDirectory.open(new File("H:/Lucene/index"));
		//指定索引查詢目錄
		DirectoryReader ireader = DirectoryReader.open(directory);
		IndexSearcher isearcher = new IndexSearcher(ireader);
		//指定查詢的field名和使用的分詞解析器
		QueryParser parser = new QueryParser(Version.LUCENE_47,"BRIEF",analyzer);
		Query query = parser.parse(content);
		//搜尋得分排序的陣列,文字中包含收搜內容的數量
		
	    String resultStr = "";
	    TopDocs docs = isearcher.search(query, 10);
	    for (ScoreDoc doc : docs.scoreDocs) {
	     String str = 
	    		 "文件ID: " + doc.doc 
	    		 + "<br/>BRIEF:"+isearcher.doc(doc.doc).get("BRIEF")
	    		 + "<br/>NAME:"+isearcher.doc(doc.doc).get("NAME")
	    		 +"<br/>AGE:"+isearcher.doc(doc.doc).get("AGE")
	    		 + "<br/>得分情況: " + doc.score
	     		 + "<hr border='5px' color='red'/>";
	     resultStr += str;
	    }
	    return resultStr;
	}
	
	//查詢索引,收搜文件物件,頁面高亮顯示
	@RequestMapping("/highlighterSearch")
    public List<Map<String,String>> highlighterTesting() throws Exception{
		String content = "中國";
		Directory directory = FSDirectory.open(new File("H:/Lucene/index"));
		//指定索引查詢目錄
		DirectoryReader ireader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(ireader);
		QueryParser parser = new QueryParser(Version.LUCENE_47,"BRIEF",analyzer);
		
		Query query = parser.parse(content);
		TopDocs hits = searcher.search(query, 10);
		SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>");
		Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
		//設定高亮處理的字元個數
		highlighter.setMaxDocCharsToAnalyze(20);
		List<Map<String,String>> list = new ArrayList<Map<String,String>>();
		int item = hits.scoreDocs.length;
		System.out.println(item);
		for (int i = 0; i < item; i++) {
			System.out.println(i);
		    int id = hits.scoreDocs[i].doc;
		    Document doc = searcher.doc(id);
		    
		    Map<String,String> map = new HashMap<String,String>();
		    map.put("name",doc.get("NAME"));
		    
		    String text = doc.get("BRIEF");
		    TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "BRIEF", analyzer);
		    TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
		    System.out.println(frag.length);
		    for (int j = 0; j < frag.length; j++) {
		    	System.out.println(frag[j]);
                if((frag[j] != null) && (frag[j].getScore() > 0)) {
			        String str = frag[j].toString();
			        System.out.println(str);
			        map.put("brief",str);
			    }
		    }
		    
		    map.put("age",doc.get("AGE"));
		    list.add(map);
		}
		
		return list;
	}
}

package cn.et;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication  
public class SpringBootMain {  
    public static void main(String[] args) {  
        SpringApplication.run(SpringBootMain.class, args);  
    }  
}