1. 程式人生 > >Python的treelib構建多叉樹——快速命名節點id

Python的treelib構建多叉樹——快速命名節點id

思想就是:為保證多叉樹節點的唯一性,主要就是根據巢狀list,構建對應的節點id
首先,將巢狀list的第一個list元素,作為第一個list的元素節點的id ;
其次,為了保證節點的命名不重複,建立一個字典來統計各個節點的出現次數 ;
若有同一棵樹上不在路徑的子樹相同,怎麼知道將該字數放到哪個節點下面呢?
很簡單,就是建立臨時path,從當前list的第一個元素與之前的list相同位置元素進行比較,若第一個元素相同,則將對應的list抽出放到path,當前list節點命名與path中的同位置相同元素一樣,若不同,則將id命名為該元素與其當前計數的拼接str。

def encoding_tree_node_id
(temps): # first template的值直接作為其id temps_id = [temps[0]] # 構建一個id計數器以對其節點id進行命名 id_count_dict = dict(zip(temps_id[0],[1]*len(temps_id[0]))) # Template總數 len_temps = len(temps) # encoding id begin for i in range(1,len_temps): temp_path = [] # 將符合條件的temp存入臨時path temp_path_loc =
[] # 記錄臨時path在原始temps中的位置 len_current_list = len(temps[i]) for xi in temps[i]: if xi in id_count_dict.keys(): id_count_dict[xi] = id_count_dict[xi] + 1 else: id_count_dict[xi] = 1 for k in range(len_current_list): if
k == 0: for j in range(i): if temps[i][k] == temps[j][k]: temp_path.append(temps[j]) # temp_path = [['sa1','sa3','s5'], ['sa1','sa3','s6'], ['sa1','sa5'],['sa1','sa3','sa7','sa8']] temp_path_loc.append(j) # temp_path_loc = [3,4,5,6] if len(temps_id) < i + 1: temps_id.append([temps_id[j][k]]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7']] else: # if temps_id[j][k] == temps_id[j+1][k]: # pass pass else: pass if len(temp_path) == 0: temp_path.append(temps[i]) temp_path_loc.append(j) temps_id.append([]) # flatten_temps_id00 = list(set(flatten(temps_id))) for xk in range(len_current_list): if temps[i][xk] in id_count_dict.keys(): temps_id[i].append( temps[i][xk] + str(id_count_dict[temps[i][xk]]) ) # temps_id[i].append(temps[i][xk] + '01') else: temps_id[i].append(temps[i][xk]) # temps_id.append(temps[i]) break # else: # pass # elif len(temp_path) == 0: # temp_path.append(temps[i]) # temp_path = [['sa1','sa3','s5']] # temp_path_loc.append(i) # temps_id.append(temps[i]) # break # elif len(temp_path) == 1: # break else: temp_path01 = [] temp_path_loc01 = [] for x in range(len(temp_path)): if (k+1) <= len(temp_path[x]): if temps[i][k] == temp_path[x][k]: temp_path01.append(temp_path[x]) temp_path_loc01.append(temp_path_loc[x]) # temp_path = temp_path if len(temps_id[i]) < k+1: temps_id[i].append(temps_id[temp_path_loc[x]][k]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7','sa2']] else: # if temps_id[j][k] == temps_id[i][k]: # pass pass else: pass else: continue temp_path = temp_path01 temp_path_loc = temp_path_loc01 if temp_path == []: break else: continue if len(temps_id[i]) < len(temps[i]): for y in range( len(temps_id[i]),len(temps[i]) ): # flatten_temps_id = list(set(flatten(temps_id))) if temps[i][y] in id_count_dict.keys(): temps_id[i].append( temps[i][y] + str(id_count_dict[temps[i][y]]) ) else: temps_id[i].append(temps[i][y]) return temps_id

例如:

>>> temps = [
['sa7','sa2','sa3','sa5'],
['sa1','sa3','sa5'],
['sa1','sa3','sa6'],
['sa1','sa5'],
['sa7','sa2','sa3','sa6'],
['sa7','sa2','sa4'],
['sa1','sa3','sa7','sa8'],
['sa1','sa5','sa8']
]
>>> encoding_tree_node_id(temps)
[['sa7', 'sa2', 'sa3', 'sa5'], ['sa11', 'sa32', 'sa52'], ['sa11', 'sa32', 'sa61'], ['sa11', 'sa53'], ['sa7', 'sa2', 'sa3', 'sa62'], ['sa7', 'sa2', 'sa41'], ['sa11', 'sa32', 'sa74', 'sa81'], ['sa11', 'sa53', 'sa82']]