elasticsearch5.11整合ik+pinyin分詞java api
阿新 • • 發佈:2019-01-05
1.建立index並初制定分詞
由於官方提供的XContentBuilder工具沒有直接寫json來的直接,這裡我就直接把setting用json建立
/** * 建立索引 * @param indexName 索引名 */ public static void createIndex(String indexName){ try { EsTools esTools = new EsTools(); TransportClient client = esTools.getClient(); /**1.建立索引對映*/ client.admin().indices() .prepareCreate(indexName).setSettings("{\"index\" : " + "{\"analysis\" : " + "{\"analyzer\" : " + "{\"ik_pinyin_analyzer\" : " + "{\"tokenizer\" : \"ik_max_word\", " + "\"filter\" : [\"my_pinyin\",\"word_delimiter\"]}}," + "\"filter\" : {\"my_pinyin\" : " + "{\"type\" : \"pinyin\", \"first_letter\" : \"prefix\",\"padding_char\" : \" \" }}}}}").get(); System.out.println("索引建立成功"); } catch (Exception e) { e.printStackTrace(); System.out.println("索引建立失敗"); } }
2.建立type並設定mapping
/** * 建立型別並設定mapping * @param indexName 索引名 * @param typeName 型別名 */ public static void createMapping(String indexName,String typeName,String columnName){ EsTools esTools = new EsTools(); TransportClient client = esTools.getClient(); //建立mapping PutMappingRequest mapping = Requests.putMappingRequest(indexName).type(typeName) .source("{\""+ typeName +"\": {\"properties\": " + "{\"name\": {\"type\": \"string\",\"analyzer\": \"ik_pinyin_analyzer\"}}}}"); client.admin().indices().putMapping(mapping).actionGet(); System.out.println("mapping建立成功"); }
這裡需要提到的是:first_letter即拼音首字母,可以設定為(預設為none): prefix , append , only 和none,如“中國”的分詞效果分別為”zg zhong guo","zhong guo zg","zg","zhong guo"
3.插入資料
/** * 插入資料 * @param indexName 索引名 * @param typeName 型別 * @param column 列 * @param value 值 */ public static void setDate(String indexName,String typeName,String column,String value){ try { EsTools esTools = new EsTools(); TransportClient client = esTools.getClient(); /**2.插入資料*/ IndexResponse response = client.prepareIndex(indexName,typeName) .setSource(jsonBuilder() .startObject().field(column, value).endObject() ) .setId(UUID.randomUUID().toString()+"") .get(); System.out.println("插入成功"); } catch (IOException e) { e.printStackTrace(); System.out.println("插入失敗"); } }
4.查詢並高亮顯示
/**
* 查詢列印
* @param index 索引
* @param type 型別
* @param column 列
* @param key 關鍵詞
* @throws UnknownHostException
*/
public static void findPrint(String index,String type,String column,String key) throws UnknownHostException{
// 設定叢集名稱
Settings settings = Settings.builder()
.put("cluster.name", "my-application").build();
// 建立client
TransportClient client = new PreBuiltTransportClient(settings)
.addTransportAddress(new InetSocketTransportAddress(
InetAddress.getByName("127.0.0.1"), 9300));
QueryBuilder matchQuery = QueryBuilders.matchQuery(column, key);
HighlightBuilder hiBuilder=new HighlightBuilder();
hiBuilder.preTags("<h2>");
hiBuilder.postTags("</h2>");
hiBuilder.field(column);
// 搜尋資料
SearchResponse response = client.prepareSearch(index)
.setTypes(type)
.setQuery(matchQuery)
.highlighter(hiBuilder)
.execute().actionGet();
//獲取查詢結果集
SearchHits searchHits = response.getHits();
System.out.println("共搜到:"+searchHits.getTotalHits()+"條結果!");
//遍歷結果
for(SearchHit hit:searchHits){
System.out.println("String方式列印文件搜尋內容:");
System.out.println(hit.getSourceAsString());
if(null != hit && null != hit.getHighlightFields() && hit.getHighlightFields().size() > 0 ){
System.out.println("Map方式列印高亮內容");
System.out.println(hit.getHighlightFields());
System.out.println("遍歷高亮集合,列印高亮片段:");
Text[] text = hit.getHighlightFields().get(column).getFragments();
for (Text str : text) {
System.out.println(str.string());
}
}
}
}
呼叫
public static void main(String[] args) throws IOException{
String indexName = "index3";
String typeName = "t3";
String columnName = "name";
// //建立索引
createIndex(indexName);
//
// //建立型別並設定mapping
createMapping(indexName,typeName,columnName);
//
// //插入資料
setDate(indexName,typeName,columnName,"曹操官渡大破袁紹,為吞併河北打開了勝利的大門");
//查詢資料
findPrint(indexName,typeName,columnName,"w");
}