1. 程式人生 > >轉:完整的最簡單的譜聚類python代碼

轉:完整的最簡單的譜聚類python代碼

得到 for details -c 簡單的 enum off ads odin

http://blog.csdn.net/waleking/article/details/7584084

針對karate_club數據集,做了譜聚類。由於是2-way clustering,比較簡單,得到了圖的新的表示空間之後,沒有做k-means,僅僅針對正規化後的拉普拉斯矩陣的第二特征值做了符號判斷,這和Spectral Clustering Tutorial 一文中的描述一致。

引用了numpy scipy matplotlib networkx包

  1. #coding=utf-8
  2. #MSC means Multiple Spectral Clustering
  3. import numpy as np
  4. import scipy as sp
  5. import scipy.linalg as linalg
  6. import networkx as nx
  7. import matplotlib.pyplot as plt
  8. def getNormLaplacian(W):
  9. """input matrix W=(w_ij)
  10. "compute D=diag(d1,...dn)
  11. "and L=D-W
  12. "and Lbar=D^(-1/2)LD^(-1/2)
  13. "return Lbar
  14. """
  15. d=[np.sum(row) for row in W]
  16. D=np.diag(d)
  17. L=D-W
  18. #Dn=D^(-1/2)
  19. Dn=np.power(np.linalg.matrix_power(D,-1),0.5)
  20. Lbar=np.dot(np.dot(Dn,L),Dn)
  21. return Lbar
  22. def getKSmallestEigVec(Lbar,k):
  23. """input
  24. "matrix Lbar and k
  25. "return
  26. "k smallest eigen values and their corresponding eigen vectors
  27. """
  28. eigval,eigvec=linalg.eig(Lbar)
  29. dim=len(eigval)
  30. #查找前k小的eigval
  31. dictEigval=dict(zip(eigval,range(0,dim)))
  32. kEig=np.sort(eigval)[0:k]
  33. ix=[dictEigval[k] for k in kEig]
  34. return eigval[ix],eigvec[:,ix]
  35. def checkResult(Lbar,eigvec,eigval,k):
  36. """
  37. "input
  38. "matrix Lbar and k eig values and k eig vectors
  39. "print norm(Lbar*eigvec[:,i]-lamda[i]*eigvec[:,i])
  40. """
  41. check=[np.dot(Lbar,eigvec[:,i])-eigval[i]*eigvec[:,i] for i in range(0,k)]
  42. length=[np.linalg.norm(e) for e in check]/np.spacing(1)
  43. print("Lbar*v-lamda*v are %s*%s" % (length,np.spacing(1)))
  44. g=nx.karate_club_graph()
  45. nodeNum=len(g.nodes())
  46. m=nx.to_numpy_matrix(g)
  47. Lbar=getNormLaplacian(m)
  48. k=2
  49. kEigVal,kEigVec=getKSmallestEigVec(Lbar,k)
  50. print("k eig val are %s" % kEigVal)
  51. print("k eig vec are %s" % kEigVec)
  52. checkResult(Lbar,kEigVec,kEigVal,k)
  53. #跳過k means,用最簡單的符號判別的方法來求點的歸屬
  54. clusterA=[i for i in range(0,nodeNum) if kEigVec[i,1]>0]
  55. clusterB=[i for i in range(0,nodeNum) if kEigVec[i,1]<0]
  56. #draw graph
  57. colList=dict.fromkeys(g.nodes())
  58. for node,score in colList.items():
  59. if node in clusterA:
  60. colList[node]=0
  61. else:
  62. colList[node]=0.6
  63. plt.figure(figsize=(8,8))
  64. pos=nx.spring_layout(g)
  65. nx.draw_networkx_edges(g,pos,alpha=0.4)
  66. nx.draw_networkx_nodes(g,pos,nodelist=colList.keys(),
  67. node_color=colList.values(),
  68. cmap=plt.cm.Reds_r)
  69. nx.draw_networkx_labels(g,pos,font_size=10,font_family=‘sans-serif‘)
  70. plt.axis(‘off‘)
  71. plt.title("karate_club spectral clustering")
  72. plt.savefig("spectral_clustering_result.png")
  73. plt.show()


所得聚類結果:

技術分享圖片

感謝python社區!

life is short, use python!

轉:完整的最簡單的譜聚類python代碼