機器學習之決策樹----python實現
阿新 • • 發佈:2018-12-17
# -*- coding: utf-8 -*- import numpy as np import scipy as sp import matplotlib.pyplot as plt from sklearn import tree from sklearn.metrics import precision_recall_curve from sklearn.metrics import classification_report from sklearn.cross_validation import train_test_split plt.switch_backend('agg') # 資料讀入 data = [] labels = [] with open("file_fac_abe.txt") as ifile: for line in ifile: tokens = line.strip().split(',') #print('tokens:',tokens) data.append([int(tk) for tk in tokens[:-1]]) labels.append(tokens[-1]) x = np.array(data) labels = np.array(labels) y = np.zeros(labels.shape) print('x:',x) print(len(x)) print('labels:',labels) print(len(labels)) print('y:',y) print(len(y)) #標籤轉換為0/1 y[labels=='LCS']=1 print('y:',y) #拆分訓練資料與測試資料 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2,random_state=0) ''' print('x_train:',x_train) print(len(x_train)) print('x_test:',x_test) print(len(x_test)) print('y_train:',y_train) print(len(y_train)) print('y_test:',y_test) print(len(y_test)) ''' # 使用資訊熵作為劃分標準,對決策樹進行訓練 clf = tree.DecisionTreeClassifier(criterion='entropy') #print('clf:',clf) clf.fit(x_train, y_train) print('clf:',clf) # 把決策樹結構寫入檔案 ''' with open("DT_fac_abe_tree.dot", 'w') as f: f = tree.export_graphviz(clf, out_file=f) # 係數反映每個特徵的影響力。越大表示該特徵在分類中起到的作用越大 ''' print('clf.feature_importances_:',clf.feature_importances_) #測試結果的列印 answer = clf.predict(x_test) print('x_test:',x_test) print('answer:',answer) print('y_test:',y_test) print('np.mean(answer==y_test):',np.mean( answer == y_test)) #準確率與召回率 precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test)) answer = clf.predict_proba(x_test)[:,1] print(classification_report(y_test, answer, target_names = ['Non_LCS','LCS']))
檔案為“file_fac_abe.txt”,其格式為:
得到結果如下:
(train_set_model)
(test_results) :