1. 程式人生 > >java中solr全文檢索的使用

java中solr全文檢索的使用

  採用SolrInputDocument物件增加、刪除索引

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrInputDocument;

public class AddDocsDemo {
public static final String SOLR_URL = "http://172.168.63.233:8983/solr"
; public static void main(String[] args) { //通過瀏覽器檢視結果 //http://172.168.63.233:8983/solr/collection1/select?q=name%3A%E6%94%B9%E9%9D%A9&wt=json&indent=true //AddDocs(); delDocs(); } public static void AddDocs() { String[] words = { "中央全面深化改革領導小組", "第四次會議", "審議了國企薪酬制度改革", "考試招生制度改革", "傳統媒體與新媒體融合等", "相關內容檔案"
, "習近平強調要", "逐步規範國有企業收入分配秩序", "實現薪酬水平適當", "結構合理、管理規範、監督有效", "對不合理的偏高", "過高收入進行調整", "深化考試招生制度改革", "總的目標是形成分類考試", "綜合評價", "多元錄取的考試招生模式", "健全促進公平", "科學選才", "監督有力的體制機制", "著力打造一批形態多樣", "手段先進", "具有競爭力的新型主流媒體", "建成幾家擁有強大實力和傳播力", "公信力", "影響力的新型媒體集團" }; long start = System.currentTimeMillis(); Collection<SolrInputDocument> docs = new
ArrayList<SolrInputDocument>(); for (int i = 1; i < 300; i++) { SolrInputDocument doc1 = new SolrInputDocument(); doc1.addField("id", "id" + i, 1.0f); doc1.addField("name", words[i % 21], 1.0f); doc1.addField("price", 10 * i); docs.add(doc1); } try { HttpSolrServer server = new HttpSolrServer(SOLR_URL); // 可以通過三種方式增加docs,其中server.add(docs.iterator())效率最高 // 增加後通過執行commit函式commit (936ms) // server.add(docs); // server.commit(); // 增加doc後立即commit (946ms) // UpdateRequest req = new UpdateRequest(); // req.setAction(ACTION.COMMIT, false, false); // req.add(docs); // UpdateResponse rsp = req.process(server); // the most optimal way of updating all your docs // in one http request(432ms) server.add(docs.iterator()); } catch (Exception e) { System.out.println(e); } System.out.println("time elapsed(ms):" + (System.currentTimeMillis() - start)); } public static void delDocs() { long start = System.currentTimeMillis(); try { HttpSolrServer server = new HttpSolrServer(SOLR_URL); List<String> ids = new ArrayList<String>(); for (int i = 1; i < 300; i++) { ids.add("id" + i); } server.deleteById(ids); server.commit(); } catch (Exception e) { System.out.println(e); } System.out.println("time elapsed(ms):" + (System.currentTimeMillis() - start)); } }
  •  採用POJOs增加、刪除索引
  • import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Collection;
    import java.util.List;
    import java.util.Random;
    
    import org.apache.solr.client.solrj.beans.DocumentObjectBinder;
    import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
    import org.apache.solr.client.solrj.impl.HttpSolrServer;
    import org.apache.solr.common.SolrInputDocument;
    
    public class AddBeansDemo {
    public static final String SOLR_URL = "http://172.168.63.233:8983/solr";
    
    public static void main(String[] args) {
    // 通過瀏覽器檢視結果
    // 要保證bean中各屬性的名稱在conf/schema.xml中存在,如果查詢,要儲存被索引
    // http://172.168.63.233:8983/solr/collection1/select?q=description%3A%E6%94%B9%E9%9D%A9&wt=json&indent=true
    //		delBeans();
    AddBeans();
    }
    
    public static Random rand = new Random(47);
    public static String[] authors = { "張三", "李四", "王五", "趙六", "張飛", "劉備",
    "關雲長" };
    public static String[] links = {
    "http://repository.sonatype.org/content/sites/forge-sites/m2e/",
    "http://news.ifeng.com/a/20140818/41626965_0.shtml",
    "http://news.ifeng.com/a/20140819/41631363_0.shtml?wratingModule_1_9_1",
    "http://news.ifeng.com/topic/19382/",
    "http://news.ifeng.com/topic/19644/" };
    
    public static String genAuthors() {
    List<String> list = Arrays.asList(authors).subList(0, rand.nextInt(7));
    String str = "";
    for (String tmp : list) {
    str += " " + tmp;
    }
    return str;
    }
    
    public static List<String> genLinks() {
    return Arrays.asList(links).subList(0, rand.nextInt(5));
    }
    
    public static void AddBeans() {
    String[] words = { "中央全面深化改革領導小組", "第四次會議", "審議了國企薪酬制度改革", "考試招生制度改革",
    "傳統媒體與新媒體融合等", "相關內容檔案", "習近平強調要", "逐步規範國有企業收入分配秩序",
    "實現薪酬水平適當", "結構合理、管理規範、監督有效", "對不合理的偏高", "過高收入進行調整",
    "深化考試招生制度改革", "總的目標是形成分類考試", "綜合評價", "多元錄取的考試招生模式", "健全促進公平",
    "科學選才", "監督有力的體制機制", "著力打造一批形態多樣", "手段先進", "具有競爭力的新型主流媒體",
    "建成幾家擁有強大實力和傳播力", "公信力", "影響力的新型媒體集團" };
    
    long start = System.currentTimeMillis();
    Collection<NewsBean> docs = new ArrayList<NewsBean>();
    //		DocumentObjectBinder binder = new DocumentObjectBinder();
    for (int i = 1; i < 300; i++) {
    NewsBean news = new NewsBean();
    news.setId("id" + i);
    news.setName("news" + i);
    news.setAuthor(genAuthors());
    news.setDescription(words[i % 21]);
    news.setRelatedLinks(genLinks());
    //			SolrInputDocument doc1 = binder.toSolrInputDocument(news);
    docs.add(news);
    }
    try {
    HttpSolrServer server = new HttpSolrServer(SOLR_URL);
    server.setRequestWriter(new BinaryRequestWriter());
    // 可以通過二種方式增加docs,其中server.add(docs.iterator())效率最高
    // 增加後通過執行commit函式commit (981ms)
    // server.addBeans(docs);
    // server.commit();
    
    // the most optimal way of updating all your docs
    // in one http request(481ms)
    server.addBeans(docs.iterator());
    server.optimize(); //time elasped 1176ms
    } catch (Exception e) {
    System.out.println(e);
    }
    System.out.println("time elapsed(ms):"
    + (System.currentTimeMillis() - start));
    }
    
    public static void delBeans() {
    long start = System.currentTimeMillis();
    try {
    HttpSolrServer server = new HttpSolrServer(SOLR_URL);
    List<String> ids = new ArrayList<String>();
    for (int i = 1; i < 300; i++) {
    ids.add("id" + i);
    }
    server.deleteById(ids);
    server.commit();
    } catch (Exception e) {
    System.out.println(e);
    }
    System.out.println("time elapsed(ms):"
    + (System.currentTimeMillis() - start));
    }
    }
    import java.util.List;
    
    import org.apache.solr.client.solrj.beans.Field;
    
    class NewsBean {
    @Field
    private String id;
    
    @Field
    private String name;
    
    @Field
    private String author;
    
    @Field
    private String description;
    
    @Field("links")
    private List<String> relatedLinks;
    
    public NewsBean(){
    
    }
    
    public String getId() {
    return id;
    }
    
    public void setId(String id) {
    this.id = id;
    }
    
    
    public String getName() {
    return name;
    }
    
    public void setName(String name) {
    this.name = name;
    }
    
    public String getAuthor() {
    return author;
    }
    
    public void setAuthor(String author) {
    this.author = author;
    }
    
    public String getDescription() {
    return description;
    }
    
    public void setDescription(String description) {
    this.description = description;
    }
    
    public List<String> getRelatedLinks() {
    return relatedLinks;
    }
    
    public void setRelatedLinks(List<String> relatedLinks) {
    this.relatedLinks = relatedLinks;
    }
    }
  •     普通方式處理查詢結果
  • import java.io.IOException;
    
    import org.apache.solr.client.solrj.SolrQuery;
    import org.apache.solr.client.solrj.SolrQuery.ORDER;
    import org.apache.solr.client.solrj.SolrServerException;
    import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
    import org.apache.solr.client.solrj.impl.HttpSolrServer;
    import org.apache.solr.client.solrj.impl.XMLResponseParser;
    import org.apache.solr.client.solrj.response.QueryResponse;
    import org.apache.solr.common.SolrDocument;
    
    public class QueryDocsDemo {
    //	public static final String SOLR_URL = "http://192.168.230.128:8983/solr";
    public static final String SOLR_URL = "http://172.168.63.233:8983/solr";
    
    public static void main(String[] args) throws SolrServerException, IOException {
    HttpSolrServer server = new HttpSolrServer(SOLR_URL);
    server.setMaxRetries(1);
    server.setMaxRetries(1); // defaults to 0. > 1 not recommended.
    server.setConnectionTimeout(5000); // 5 seconds to establish TCP
    //正常情況下,以下引數無須設定
    //使用老版本solrj操作新版本的solr時,因為兩個版本的javabin incompatible,所以需要設定Parser
    server.setParser(new XMLResponseParser());
    server.setSoTimeout(1000); // socket read timeout
    server.setDefaultMaxConnectionsPerHost(100);
    server.setMaxTotalConnections(100);
    server.setFollowRedirects(false); // defaults to false
    // allowCompression defaults to false.
    // Server side must support gzip or deflate for this to have any effect.
    server.setAllowCompression(true);
    
    //使用ModifiableSolrParams傳遞引數
    //		ModifiableSolrParams params = new ModifiableSolrParams();
    //		// 192.168.230.128:8983/solr/select?q=video&fl=id,name,price&sort=price asc&start=0&rows=2&wt=json
    //		// 設定引數,實現上面URL中的引數配置
    //		// 查詢關鍵詞
    //		params.set("q", "video");
    //		// 返回資訊
    //		params.set("fl", "id,name,price,score");
    //		// 排序
    //		params.set("sort", "price asc");
    //		// 分頁,start=0就是從0開始,rows=5當前返回5條記錄,第二頁就是變化start這個值為5就可以了
    //		params.set("start", 2);
    //		params.set("rows", 2);
    //		// 返回格式
    //		params.set("wt", "javabin");
    //		QueryResponse response = server.query(params);
    
    //使用SolrQuery傳遞引數,SolrQuery的封裝性更好
    server.setRequestWriter(new BinaryRequestWriter());
    SolrQuery query = new SolrQuery();
    query.setQuery("video");
    query.setFields("id","name","price","score");
    query.setSort("price", ORDER.asc);
    query.setStart(0);
    query.setRows(2);
    //		query.setRequestHandler("/select");
    QueryResponse response = server.query( query );
    
    
    
    // 搜尋得到的結果數
    System.out.println("Find:" + response.getResults().getNumFound());
    // 輸出結果
    int iRow = 1;
    for (SolrDocument doc : response.getResults()) {
    System.out.println("----------" + iRow + "------------");
    System.out.println("id: " + doc.getFieldValue("id").toString());
    System.out.println("name: " + doc.getFieldValue("name").toString());
    System.out.println("price: "
    + doc.getFieldValue("price").toString());
    System.out.println("score: " + doc.getFieldValue("score"));
    iRow++;
    }
    }
    }
  •     採用POJOs方式處理查詢結果
  • import java.io.IOException;
    import java.util.List;
    
    import org.apache.solr.client.solrj.SolrQuery;
    import org.apache.solr.client.solrj.SolrQuery.ORDER;
    import org.apache.solr.client.solrj.SolrServerException;
    import org.apache.solr.client.solrj.beans.DocumentObjectBinder;
    import org.apache.solr.client.solrj.impl.HttpSolrServer;
    import org.apache.solr.client.solrj.response.FacetField;
    import org.apache.solr.client.solrj.response.QueryResponse;
    import org.apache.solr.common.SolrDocument;
    import org.apache.solr.common.SolrDocumentList;
    
    public class QueryBeanDemo {
    public static final String SOLR_URL = "http://172.168.63.233:8983/solr";
    
    public static void main(String[] args) throws SolrServerException,
    IOException {
    // http://172.168.63.233:8983/solr/collection1/select?q=description%3A%E6%80%BB%E7%9B%AE%E6%A0%87&facet=true&facet.field=author_s
    HttpSolrServer server = new HttpSolrServer(SOLR_URL);
    server.setMaxRetries(1);
    server.setMaxRetries(1); // defaults to 0. > 1 not recommended.
    server.setConnectionTimeout(5000); // 5 seconds to establish TCP
    // server.setRequestWriter(new BinaryRequestWriter());
    
    SolrQuery query = new SolrQuery();
    query.setQuery("description:改革");
    query.setStart(0);
    query.setRows(2);
    query.setFacet(true);
    query.addFacetField("author_s");
    
    QueryResponse response = server.query(query);
    // 搜尋得到的結果數
    System.out.println("Find:" + response.getResults().getNumFound());
    // 輸出結果
    int iRow = 1;
    
    //response.getBeans存在BUG,將DocumentObjectBinder引用的Field應該為 org.apache.solr.client.solrj.beans.Field
    SolrDocumentList list = response.getResults();
    DocumentObjectBinderL binder = new DocumentObjectBinderL();
    List<NewsBean> beanList=binder.getBeans(NewsBean.class, list);
    for(NewsBean news:beanList){
    System.out.println(news.getId());
    }
    
    for (SolrDocument doc : response.getResults()) {
    System.out.println("----------" + iRow + "------------");
    System.out.println("id: " + doc.getFieldValue("id").toString());
    System.out.println("name: " + doc.getFieldValue("name").toString());
    iRow++;
    }
    for (FacetField ff : response.getFacetFields()) {
    System.out.println(ff.getName() + "," + ff.getValueCount() + ","
    + ff.getValues());
    }
    }
    }