1. 程式人生 > >利用邏輯迴歸,決策樹,svm計算準確率和AUC值

利用邏輯迴歸,決策樹,svm計算準確率和AUC值

利用邏輯迴歸,決策樹,svm計算準確率和AUC值

  • 匯入的包
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.preprocessing import label_binarize
  • 讀取資料
# 讀取資料集
data_all = pd.read_csv('/home/infisa/wjht/project/DataWhale/data_all.csv', encoding='gbk')
  • 劃分資料集
# 劃分訓練集和測試集
features = [x for x in data_all.columns if x not in ['status']]
X = data_all[features]
y = data_all['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2018)
  • 構建模型
lr = LogisticRegression()  # 邏輯迴歸模型
lr.fit(X_train, y_train)

tr = DecisionTreeClassifier()  # 決策樹模型
tr.fit(X_train, y_train)

svm = SVC()  # SVM模型
svm.fit(X_train, y_train)

*模型評分

# 模型評分
lr_score = lr.score(X_test, y_test)
print(lr_score)
'lr_score:0.7484232655921513'

tr_score = tr.score(X_test, y_test)
'tr_score:0.6797477224947442'

svm_score = svm.score(X_test, y_test)
'svm_score:0.7484232655921513'
  • 計算AUC值
# 計算auc值

y_test_hot = label_binarize(y_test,classes =(0, 1)) # 將測試集標籤資料用二值化編碼的方式轉換為矩陣

lr_y_score=lr.decision_function(X_test) # 得到預測的損失值

svm_y_score = svm.decision_function(X_test) # 得到預測的損失值

lr_fpr,lr_tpr,lr_threasholds=metrics.roc_curve(y_test_hot.ravel(),lr_y_score.ravel()) # 計算ROC的值,lr_threasholds為閾值

svm_fpr,svm_tpr,svm_threasholds=metrics.roc_curve(y_test_hot.ravel(),svm_y_score.ravel())#計算ROC的值,svm_threasholds為閾值

lr_auc=metrics.auc(lr_fpr,lr_tpr)
'lr_auc:0.5674626772245001'
svm_auc=metrics.auc(lr_fpr,lr_tpr)
'svm_auc:0.5674626772245001'