第十五週(sklearn)
阿新 • • 發佈:2018-12-06
首先是建立資料集和split 資料集
import sklearn from sklearn import datasets from sklearn import cross_validation from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn import metrics dataset = datasets.make_classification(n_samples=1000,n_features=10) data = dataset[0] target = dataset[1] kf = cross_validation.KFold(len(data),n_folds=10,shuffle=True)
然後是Guassian NB 演算法訓練
# Guassian NB i = 1 for train_i,test_i in kf: x_train,y_train = data[train_i],target[train_i] x_test,y_test = data[test_i],target[test_i] clf = GaussianNB() clf.fit(x_train,y_train) pred = clf.predict(x_test) print("Group:",i) i += 1 print("Accuracy:", metrics.accuracy_score(y_test, pred)) print("F1-score:", metrics.f1_score(y_test, pred)) print("AUC ROC:",metrics.roc_auc_score(y_test, pred))
SVC
#SVC for c in [1e-02, 1e-01, 1e00, 1e01, 1e02]: i = 1 for train_index,test_index in kf: x_train,y_train = data[train_index],target[train_index] x_test,y_test = data[test_index],target[test_index] clf = SVC(C=c,kernel='rbf',gamma=0.1) clf.fit(x_train,y_train) pred = clf.predict(x_test) print("Group:",i) i += 1 print("C = ",c) print("Accuracy:", metrics.accuracy_score(y_test, pred)) print("F1-score:", metrics.f1_score(y_test, pred)) print("AUC ROC:",metrics.roc_auc_score(y_test, pred))
輸出較長,這裡不全部給出截圖
RandomForestClassifier
for n in [10, 100, 1000]:
i = 1
for train_index,test_index in kf:
x_train,y_train = data[train_index],target[train_index]
x_test,y_test = data[test_index],target[test_index]
clf = RandomForestClassifier(n_estimators=n)
clf.fit(x_train,y_train)
pred = clf.predict(x_test)
print("Group:",i)
i += 1
print("n_estimators = ",n)
print("Accuracy:", metrics.accuracy_score(y_test, pred))
print("F1-score:", metrics.f1_score(y_test, pred))
print("AUC ROC:",metrics.roc_auc_score(y_test, pred))
輸出較長,這裡不全部給出截圖
結果:RandomForestClassifier 的準確度比較好,而Guassian NB的準確度差一點,SVC的準確度波動較大