1. 程式人生 > >java相似度判斷(餘弦相似度)

java相似度判斷(餘弦相似度)

業務邏輯:

輸入某一企業,返回跟該企業相似的企業列表。

大體思路:

1、輸入企業有哪些欄位來構建相似度字典;

2、輸入某個企業欄位具體值,轉換成向量 來計算;

3、根據向量計算的值來排序。

程式碼如下:

/**

* 企業相似度

* @param id

* @return

*/

@Override

public Page findAndOrderBySimilar(String id,HiddenDangerListVO hiddenDangerListVO) {

Integer page = hiddenDangerListVO.getPage();

Integer limit = hiddenDangerListVO.getLimit();

if (page == null || page < 1) {

page = 1;

}

if (limit == null || limit < 0) {

limit = 5;

}

List<JSONObject> jsonObjectList = new ArrayList<>();

List<JSONObject> jsonObjectList2 = new ArrayList<>();

Map map = new HashMap();

String sql = "SELECT" +

" HY,YHBW,YHLY, companyname '企業名稱', " +

" HCRQ '核查日期', " +

"CASE WHEN YHDJ = '1' THEN " +

" '一般隱患' " +

" WHEN YHDJ = '2' THEN " +

" '重大隱患' ELSE '無型別' " +

" END '隱患級別', " +

"CASE WHEN ZGZT = '1' THEN " +

" '未整改' " +

" WHEN ZGZT = '2' THEN " +

" '整改中' " +

" WHEN ZGZT = '3' THEN " +

" '已整改' ELSE '無整改狀態' " +

" END '整改狀態', " +

" YHMS '隱患描述', " +

" YHDD '隱患地點'," +

" PCRE '排查人'," +

" CASE WHEN ZGLX = '1' THEN '立即整改' " +

" WHEN ZGLX = '2' THEN '限期整改' " +

" WHEN ZGLX = '3' THEN '停業停產整頓' ELSE '無整改資訊' " +

" END '整改型別'," +

" ZGWCRQ '整改完成時間' " +

" FROM t_hidden_danger_list limit 8000 ";

List<Map<String, Object>> findAll = jdbcTemplate.queryForList(sql);

for (Map map1 : findAll) {

jsonObjectList.add(JSONObject.fromObject(map1));

}

List<String> column = new ArrayList<>();

List<String> num_column = new ArrayList<>();

column.add("HY");

column.add("YHBW");

column.add("整改型別");

column.add("隱患級別");

column.add("整改狀態");

column.add("隱患地點");

num_column.add("YHLY");

Set<DicVO> dictionaries = RestructureUtil.dictionaries(jsonObjectList, column, num_column);

sql= "SELECT" +

" HY,YHBW,YHLY, companyname '企業名稱', " +

" HCRQ '核查日期', " +

"CASE WHEN YHDJ = '1' THEN " +

" '一般隱患' " +

" WHEN YHDJ = '2' THEN " +

" '重大隱患' ELSE '無型別' " +

" END '隱患級別', " +

"CASE WHEN ZGZT = '1' THEN " +

" '未整改' " +

" WHEN ZGZT = '2' THEN " +

" '整改中' " +

" WHEN ZGZT = '3' THEN " +

" '已整改' ELSE '無整改狀態' " +

" END '整改狀態', " +

" YHMS '隱患描述', " +

" YHDD '隱患地點'," +

" PCRE '排查人'," +

" CASE WHEN ZGLX = '1' THEN '立即整改' " +

" WHEN ZGLX = '2' THEN '限期整改' " +

" WHEN ZGLX = '3' THEN '停業停產整頓' ELSE '無整改資訊' " +

" END '整改型別'," +

" ZGWCRQ '整改完成時間' " +

" FROM t_hidden_danger_list "+

" WHERE ID="+id+" ";

List<Map<String, Object>> findAll2 = jdbcTemplate.queryForList(sql);

for (Map map1 : findAll2) {

jsonObjectList2.add(JSONObject.fromObject(map1));

}

List<Double> vector_1 = RestructureUtil.vectorFromJson(dictionaries, jsonObjectList2.get(0));

List<Double> vector_2;

for (int i = 0; i < jsonObjectList.size() - 1; i++) {

vector_2 = RestructureUtil.vectorFromJson(dictionaries, jsonObjectList.get(i));

if (vector_2!=null){

Double aDouble = RestructureUtil.similarityDegree(vector_1, vector_2);

map.put(jsonObjectList.get(i), aDouble);

}

}

map = sortByComparator(map);

List<Map.Entry<JSONObject, Double>> list = new ArrayList<Map.Entry<JSONObject, Double>>(map.entrySet());

//輸出

List listResult=new ArrayList();

for (Map.Entry<JSONObject, Double> entry : list) {

System.out.println(entry.getKey() + ":" + entry.getValue());

listResult.add(entry.getKey());

if (listResult.size()==100){

break;

}

}

Page pageResult = PageUtil.getPage(page, limit, listResult);

return pageResult;

}

/**

* map以value排序

*

* @param unsortMap

* @return

*/

public static Map sortByComparator(Map unsortMap) {

List list = new LinkedList(unsortMap.entrySet());

Collections.sort(list, new Comparator() {

public int compare(Object o1, Object o2) {

return ((Comparable) ((Map.Entry) (o2)).getValue())

.compareTo(((Map.Entry) (o1)).getValue());

}

});

Map sortedMap = new LinkedHashMap();

for (Iterator it = list.iterator(); it.hasNext(); ) {

Map.Entry entry = (Map.Entry) it.next();

sortedMap.put(entry.getKey(), entry.getValue());

}

return sortedMap;

}