1. 程式人生 > >譜聚類python實踐

譜聚類python實踐

xlabel 部分 sample view from res fit float enume

技術分享

聚類後:

技術分享

# -*- coding: utf-8 -*-
"""
Created on 09 05 2017

@author: similarface
"""
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3
from sklearn import datasets
from sklearn import metrics
from sklearn.cluster import SpectralClustering

#500 個樣本 3個特征 6個中心 方差數據的離散程度
X, y = datasets.make_blobs(n_samples=500, n_features=3, centers=6, cluster_std=[1.4, 0.3, 1.4, 0.3, 0.4, 0.9],random_state=11)
xx, yy, zz = X[:, 0], X[:, 1], X[:, 2]
# 創建一個三維的繪圖工程
ax = plt.subplot(111, projection=‘3d‘)
# 將數據點分成三部分畫,在顏色上有區分度
# 繪制數據點
ax.scatter(xx, yy, zz, c=‘y‘)
# 坐標軸
ax.set_zlabel(‘Z‘)
ax.set_ylabel(‘Y‘)
ax.set_xlabel(‘X‘)
plt.show()

#為了區分 聚類成5個類
y_pred = SpectralClustering(n_clusters=5, gamma=0.1).fit_predict(X)
fig = plt.figure()
ax = p3.Axes3D(fig)
ax.view_init(7, -80)
for l in np.unique(y_pred):
ax.scatter(X[y_pred == l, 0], X[y_pred == l, 1], X[y_pred == l, 2],color=plt.cm.jet(float(l) / np.max(y_pred + 1)),s=20, edgecolor=‘k‘)
plt.title(u‘譜聚類‘)
plt.show()


#交叉計算 簇個數 以及RBF的 參數值 最後的max(Calinski-Harabasz Score) 為最佳
# for index, gamma in enumerate((0.01,0.1,1,10)):
# for index, k in enumerate((3,4,5,6)):
# y_pred = SpectralClustering(n_clusters=k, gamma=gamma).fit_predict(X)
# print "Calinski-Harabasz Score with gamma=", gamma, "n_clusters=", k,"score:", metrics.calinski_harabaz_score(X, y_pred)

‘‘‘
http://www.cnblogs.com/pinard/p/6221564.html
‘‘‘

譜聚類python實踐