Solr 相似度判斷moreLikeThis
阿新 • • 發佈:2019-02-12
/**
* 獲取相似性資料
* @param id
* @param mindf
* @param mintf
* @param count
* @return
*/
public List<LableGroup> getRelated(String id, int mindf, int mintf, int count) {
List<LableGroup> lableGroups = new ArrayList<LableGroup>();
SolrQuery solrQuery = new SolrQuery();
try {
solrQuery.setQuery("id:" + id)
.setParam("fl", "id,title,url,score")
.setParam("mlt", "true")
.setParam("mlt.fl", "title")
.setParam("mlt.mindf", String.valueOf(mindf))
.setParam("mlt.mintf", String.valueOf(mintf))
.setParam("mlt.count", String.valueOf(count));
QueryResponse response = server.query(solrQuery);
if (null == response) return lableGroups;
@SuppressWarnings("unchecked")
SimpleOrderedMap<SolrDocumentList> solrDocumentLists = (SimpleOrderedMap<SolrDocumentList>) response.getResponse().get("moreLikeThis");
for (int i=0; i<solrDocumentLists.size(); i++) {
SolrDocumentList solrDocumentList = solrDocumentLists.getVal(i);
for (SolrDocument doc : solrDocumentList) {
String doc_id = doc.getFieldValue("id").toString();
String doc_title = doc.getFieldValue("title").toString();
String doc_url = doc.getFieldValue("url").toString();
String doc_score = doc.getFieldValue("score").toString();
// 判斷相似度
if (Double.parseDouble(doc_score) > 1) {
LableGroup lableGroup = new LableGroup();
lableGroup.setId(doc_id);
lableGroup.setTitle(doc_title);
lableGroup.setUrl(doc_url);
lableGroups.add(lableGroup);
}
}
}
} catch (Exception e) {
log.error("獲取相似性資料error", e);
}
return lableGroups;
* 獲取相似性資料
* @param id
* @param mindf
* @param mintf
* @param count
* @return
*/
public List<LableGroup> getRelated(String id, int mindf, int mintf, int count) {
List<LableGroup> lableGroups = new ArrayList<LableGroup>();
SolrQuery solrQuery = new SolrQuery();
try {
solrQuery.setQuery("id:" + id)
.setParam("fl", "id,title,url,score")
.setParam("mlt", "true")
.setParam("mlt.fl", "title")
.setParam("mlt.mindf", String.valueOf(mindf))
.setParam("mlt.mintf", String.valueOf(mintf))
.setParam("mlt.count", String.valueOf(count));
QueryResponse response = server.query(solrQuery);
if (null == response) return lableGroups;
@SuppressWarnings("unchecked")
SimpleOrderedMap<SolrDocumentList> solrDocumentLists = (SimpleOrderedMap<SolrDocumentList>) response.getResponse().get("moreLikeThis");
for (int i=0; i<solrDocumentLists.size(); i++) {
SolrDocumentList solrDocumentList = solrDocumentLists.getVal(i);
for (SolrDocument doc : solrDocumentList) {
String doc_id = doc.getFieldValue("id").toString();
String doc_title = doc.getFieldValue("title").toString();
String doc_url = doc.getFieldValue("url").toString();
String doc_score = doc.getFieldValue("score").toString();
// 判斷相似度
if (Double.parseDouble(doc_score) > 1) {
LableGroup lableGroup = new LableGroup();
lableGroup.setId(doc_id);
lableGroup.setTitle(doc_title);
lableGroup.setUrl(doc_url);
lableGroups.add(lableGroup);
}
}
}
} catch (Exception e) {
log.error("獲取相似性資料error", e);
}
return lableGroups;
}
注:
id:文件唯一主鍵;
fl:需要返回的欄位;
mlt:在查詢時,開啟/關閉(MoreLikeThisComponent)的布林值。
mtl.fl:根據哪些欄位判斷相似度;
mlt.mindf:最小文件頻率,所在文件的個數小於這個值的詞將不用於相似判斷;
mlt.mintf:最小分詞頻率,在單個文件中出現頻率小於這個值的詞將不用於相似判斷;
mlt.count:返回相似文章個數;