1. 程式人生 > >機器學習一個小目標——Task4

機器學習一個小目標——Task4

任務【模型評估】

記錄五個模型關於precision,rescore,f1,auc,roc的評分表格,畫出auc和roc曲線圖

遇到難題

實驗程式碼

#!/usr/bin/env python 3.6
#-*- coding:utf-8 -*-
# @File    : Model_evaluation.py
# @Date    : 2018-11-20
# @Author  : 黑桃
# @Software: PyCharm 

import pickle
from matplotlib import pyplot as plt
from sklearn.externals import
joblib from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, roc_curve path = "E:/MyPython/Machine_learning_GoGoGo/" """===================================================================================================================== 1 讀取特徵 """ print("0 讀取特徵") f = open
(path + 'feature/feature_V1.pkl', 'rb') train, test, y_train,y_test= pickle.load(f) f.close() """===================================================================================================================== 2 讀取模型 """ print("1 讀取模型") SVM_linear = joblib.load( path + "model/SVM_linear.pkl") SVM_poly =
joblib.load( path + "model/SVM_poly.pkl") SVM_rbf = joblib.load( path + "model/SVM_rbf.pkl") SVM_sigmoid = joblib.load( path + "model/SVM_sigmoid.pkl") lg_120 = joblib.load( path + "model/lg_120.pkl") DT = joblib.load( path + "model/DT.pkl") xgb_sklearn = joblib.load( path + "model/xgb_sklearn.pkl") lgb_sklearn = joblib.load( path + "model/lgb_sklearn.pkl") xgb = joblib.load( path + "model/xgb.pkl") lgb = joblib.load( path + "model/lgb.pkl") """===================================================================================================================== 3 模型評估 """ def model_evalua(clf, X_train, X_test, y_train, y_test,clf_name): y_train_pred = clf.predict(X_train) y_test_pred = clf.predict(X_test) y_train_pred_proba = clf.predict_proba(X_train)[:, 1] y_test_pred_proba = clf.predict_proba(X_test)[:, 1] """【AUC Score】""" print('AUC Score') print("Train_AUC Score :{:.4f}".format(roc_auc_score(y_train, y_train_pred))) print("Test_AUC Score :{:.4f}".format(roc_auc_score(y_test, y_test_pred))) """【準確性】""" print('準確性:') print('Train_準確性:{:.4f}'.format(accuracy_score(y_train, y_train_pred))) print('Test_準確性:{:.4f}'.format(accuracy_score(y_test, y_test_pred))) """【召回率】""" print('召回率:') print('Train_召回率:{:.4f}'.format(recall_score(y_train, y_train_pred))) print('Test_召回率:{:.4f}'.format(recall_score(y_test, y_test_pred))) """【f1_score】""" print('f1_score:') print('Train_f1_score:{:.4f}'.format(f1_score(y_train, y_train_pred))) print('Test_f1_score:{:.4f}'.format(f1_score(y_test, y_test_pred))) #描繪 ROC 曲線 fpr_tr, tpr_tr, _ = roc_curve(y_train, y_train_pred_proba) fpr_te, tpr_te, _ = roc_curve(y_test, y_test_pred_proba) # KS print('KS:') print('Train:{:.4f}'.format(max(abs((fpr_tr - tpr_tr))))) print('Test:{:.4f}'.format(max(abs((fpr_te - tpr_te))))) plt.plot(fpr_tr, tpr_tr, 'r-', label = "Train:AUC: {:.3f} KS:{:.3f}".format(roc_auc_score(y_train, y_train_pred_proba), max(abs((fpr_tr - tpr_tr))))) plt.plot(fpr_te, tpr_te, 'g-', label="Test:AUC: {:.3f} KS:{:.3f}".format(roc_auc_score(y_test, y_test_pred_proba), max(abs((fpr_tr - tpr_tr))))) plt.plot([0, 1], [0, 1], 'd--') plt.legend(loc='best') plt.title(clf_name + "ROC curse") plt.savefig(path +'picture/'+clf_name+'.jpg') plt.show() print('-------------------SVM_linear-------------------') model_evalua(SVM_linear, train, test, y_train, y_test,'SVM_linear') print('-------------------SVM_poly-------------------:') model_evalua(SVM_poly, train, test, y_train, y_test,'SVM_poly') print('-------------------SVM_rbf-------------------:') model_evalua(SVM_rbf, train, test, y_train, y_test,'SVM_rbf') print('-------------------SVM_sigmoid-------------------:') model_evalua(SVM_sigmoid, train, test, y_train, y_test,'SVM_sigmoid') print('-------------------lg_120-------------------') model_evalua(lg_120, train, test, y_train, y_test,'lg_120') print('-------------------DT-------------------') model_evalua(DT, train, test, y_train, y_test,'DT') print('-------------------xgb_sklearn-------------------') model_evalua(xgb_sklearn, train, test, y_train, y_test,'xgb_sklearn') # print('-------------------xgb-------------------') # model_evalua(xgb, train, test, y_train, y_test) print('-------------------lgb_sklearn-------------------') model_evalua(lgb_sklearn, train, test, y_train, y_test,'lgb_sklearn') # print('-------------------lgb-------------------') # model_evalua(lgb, train, test, y_train, y_test)

實驗結果

precision recall f1_score KS ROC_AUC ROC曲線
SVM_linear Train_準確性:0.7878Test_準確性:0.7442 Train_召回率:0.1683Test_召回率:0.3377 Train_f1_score:0.2781 Test_f1_score:0.4160 Train:0.4519 Test:0.2590 Train_AUC Score :0.5774 Test_AUC Score :0.6160 在這裡插入圖片描述
SVM_poly Train_準確性:0.7815 Test_準確性:0.7267 Train_召回率:0.1027 Test_召回率:0.0597 Train_f1_score:0.1859 Test_f1_score:0.1055 Train:0.7099 Test:0.3082 Train_AUC Score :0.5510 Test_AUC Score :0.5164 在這裡插入圖片描述
SVM_rbf Train_準確性:0.7971 Test_準確性:0.7589 Train_召回率:0.1894 Test_召回率:0.1455 Train_f1_score:0.3119 Test_f1_score:0.2456 Train:0.6474 Test:0.3723 Train_AUC Score :0.5907 Test_AUC Score :0.5655 在這裡插入圖片描述
SVM_sigmoid Train_準確性:0.7265 Test_準確性:0.7092 Train_召回率:0.2809 Test_召回率:0.1584 Train_f1_score:0.3328 Test_f1_score:0.2272 Train:0.2216 Test:0.1235 Train_AUC Score :0.5752 Test_AUC Score :0.5356 在這裡插入圖片描述
lg_120 Train_準確性:0.4355 Test_準確性:0.4590 Train_召回率:0.6671 Test_召回率:0.7117 Train_f1_score:0.3647 Test_f1_score:0.4152 Train:0.0695 Test:0.0907 Train_AUC Score :0.5142 Test_AUC Score :0.5387 在這裡插入圖片描述
DT Train_準確性:0.7920 Test_準確性:0.7505 Train_召回率:0.4245 Test_召回率:0.3169 Train_f1_score:0.4978 Test_f1_score:0.4067 Train:0.4126 Test:0.3524 Train_AUC Score :0.6672 Test_AUC Score :0.6138 在這裡插入圖片描述
xgb_sklearn Train_準確性:0.8452 Test_準確性:0.7765 Train_召回率:0.4691 Test_召回率:0.3065 Train_f1_score:0.5954 Test_f1_score:0.4252 Train:0.6167 Test:0.3763 Train_AUC Score :0.7175 Test_AUC Score :0.6283 在這裡插入圖片描述
lgb_sklearn Train_準確性:1.0000 Test_準確性:0.7680 Train_召回率:1.0000 Test_召回率:0.3117 Train_f1_score:1.0000 Test_f1_score:0.4203 Train:1.0000 Test:0.3761 Train_AUC Score :1.0000 Test_AUC Score :0.6242 在這裡插入圖片描述

參考文獻

ML實操 - 貸款使用者逾期情況分析
ML - 貸款使用者逾期情況分析
python matplotlib 畫圖儲存圖片簡單例子
sklearn.metrics中的評估方法介紹(accuracy_score, recall_score, roc_curve, roc_auc_score, confusion_matrix)