1. 程式人生 > >機器學習--資料判斷依據 精確度、召回率、調和平均值F1值

機器學習--資料判斷依據 精確度、召回率、調和平均值F1值

精準度(precision)

precision = 正確預測的個數(TP) / 被預測正確的個數(TP+FP)

召回率(recall)

recall = 正確預測的個數(TP)/ 預測個數(FN)

調和平均值 F1-Socre

f1 = 2*精準度 * 召回率 /(精度 * 召回率)

以二分類問題為例

真實\預測 0 1
0 預測negative正確(TN) 預測positive錯誤(FP)
1 預測positive錯誤(FN) 預測positive正確(TP)

precision = TP/(TP+FP)
recall = TP/(TP+FN)
求f1_score

from sklearn.metrics import f1_score
f1_score(y_test,y_predict)
# y_test 測試集
# y_predict 預測結果

程式碼實現上述定義

import numpy as np
from sklearn import datasets

digits = datasets.load_digits()
X = digits['data']
y =
digits['target'].copy() # 手動讓digists資料9的資料偏斜 y[digits['target']==9]=1 y[digits['target']!=9]=0 from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1) log_reg = LogisticRegression(
) log_reg.fit(X_train,y_train) log_reg.score(X_test,y_test) y_log_predict = log_reg.predict(X_test) # 預測negative正確 def TN(y_true,y_predict): return np.sum((y_true==0)&(y_predict==0)) tn = TN(y_test,y_log_predict) # tn = 399 # 預測positive錯誤 def FP(y_true,y_predict): return np.sum((y_true==0)&(y_predict==1)) fp = FP(y_test,y_log_predict) # fp = 3 # 預測negative錯誤 def FN(y_true,y_predict): return np.sum((y_true==1)&(y_predict==0)) fn = FN(y_test,y_log_predict) # fn = 7 # 預測positive正確 def TP(y_true,y_predict): return np.sum((y_true==1)&(y_predict==1)) tp = TP(y_test,y_log_predict) # tp = 41

構造混淆矩陣

def confusion_matrix(y_true,y_predict):
    return np.array([
        [TN(y_true,y_predict),FP(y_true,y_predict)],
        [FN(y_true,y_predict),TP(y_true,y_predict)]
    ])
confusion_matrix(y_test,y_log_predict)
"""
output :
array([[399,   3],
       [  7,  41]])
"""

精準率

def precision_score(y_true,y_predict):
    tp = TP(y_true,y_predict)
    fp = FP(y_true,y_predict)
    try:
        return tp/(tp+fp)
    except:
        return 0.0
precision_score(y_test,y_log_predict)
"""
output : 0.9318181818181818
"""

召回率

def recall_score(y_true,y_predict):
    tp = TP(y_true,y_predict)
    fn = FN(y_true,y_predict)
    try:
        return tp/(tp+fn)
    except:
        return 0.0
recall_score(y_test,y_log_predict)
"""
output : 0.8541666666666666
"""

以上為拆分理解 在sklearn中都可以直接求得

# 混淆矩陣
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_log_predict)
"""
output:array([[399,   3],
       [  7,  41]], dtype=int64)
"""
# 精準率
from sklearn.metrics import precision_score
precision_score(y_test,y_log_predict)
"""
output: 0.9318181818181818
"""
# z召回率
from sklearn.metrics import recall_score
recall_score(y_test,y_log_predict)
"""
output: 0.8541666666666666
"""
# classification_report
from sklearn.metrics import classification_report
print(classification_report(y_test,y_log_predict))
"""
output:              precision    recall  f1-score   support

          			0       0.98      0.99      0.99       402
          			1       0.93      0.85      0.89        48

avg / total       			0.98      0.98      0.98       450
"""