1. 程式人生 > >sklearn中kmeans聚類分析常用命令

sklearn中kmeans聚類分析常用命令

from sklearn.cluster import KMeans
from sklearn.externals import joblib
import numpy

final = open('c:/test/final.dat' , 'r')

data = [line.strip().split('\t') for line in final]
feature = [[float(x) for x in row[3:]] for row in data]

#呼叫kmeans類
clf = KMeans(n_clusters=9)
s = clf.fit(feature)
print s

#9箇中心
print clf.cluster_centers_


#每個樣本所屬的簇
print clf.labels_

#用來評估簇的個數是否合適,距離越小說明簇分的越好,選取臨界點的簇個數
print clf.inertia_

#進行預測
print clf.predict(feature)

#儲存模型
joblib.dump(clf , 'c:/km.pkl')

#載入儲存的模型

clf = joblib.load('c:/km.pkl')