1. 程式人生 > >西瓜書 課後習題3.4 十折交叉和留一法,對率迴歸

西瓜書 課後習題3.4 十折交叉和留一法,對率迴歸

import csv
import numpy as np


def readData(filename):
    """ 
    :param filename:cvs資料檔案
    :return: X1,y1,X2,y2,X3,y3
            X: list with shape[50,4],特徵   //更新:此處應該為[x;1],為shape[50,5],後面對應修改
            y: list with shape[50,],標籤
    """
    X1, X2, X3 = [], [], []
    y1, y2, y3 = [], [], []
    # 讀資料
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        for line in reader:
            if line[4] == 'Iris-setosa':
                X1.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0])
                y1.append(line[4])
            elif line[4] == 'Iris-versicolor':
                X2.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0])
                y2.append(line[4])
            else:
                X3.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0])
                y3.append(line[4])
    return X1, X2, X3, y1, y2, y3


def tenfolddata(X1, X2):
    """
    產生十折訓練資料,每折5個正例,5個反例
    :param X1: list with shape[50,4], positive shape[50,5]
    :param X2: list with shape[50,4], negative shape[50,5]
    :return: folds: list with shape[10,10,4]  shape[10,10,5]
             y; list with shape[10,10]
    """
    folds = []
    y = []
    for i in range(10):
        fold = []
        fold += X1[i * 5: (i + 1) * 5]
        fold += X2[i * 5: (i + 1) * 5]
        folds.append(fold)
        y.append([1] * 5 + [0] * 5)
    return folds, y


def LR(X, y):
    """
    訓練邏輯迴歸模型,梯度遞降法
    :param X: np.array with shape[N,d], Input  包括111...
    :param y: np.array with shape[N,1], label
    :return: beta with shape[1,d],包括b   Optimal params with gradDescent method
    """
    N, d = X.shape
    lr = 0.01             ############  對結果影響很大
    beta = np.ones((1, d)) * 0.1
    z = X.dot(beta.T)  # [N,1]

    for i in range(150):
        p1 = np.exp(z) / (1 + np.exp(z))  # shape[N,1]
        first_order = -np.sum(X * (y - p1), 0, keepdims=True)  # shape[1,d]

        # update
        beta -= first_order * lr
        z = X.dot(beta.T)
    l = np.sum(-y * z + np.log(1 + np.exp(z)))
    return beta


def testing(beta, X, y):
    """
    基於邏輯迴歸進行分類任務測試
    :param beta: np.array with shape[1,d], 邏輯迴歸引數
    :param X: np.array wiht shape[N,d], testing instances
    :param y: np.array with shape[N,1], testing labels
    :return: error_num, LR演算法分類錯誤個數
    """
    predicts = (X.dot(beta.T) >= 0)  # shape[N,1]
    error_num = np.sum(predicts != y)
    return error_num


def tenFoldCrossValidation(folds, y):
    """
    十折交叉驗證
    :param folds: list with shape[10,10,5]
    :param y: list with shape[10,10]
    :return:ten_fold_error_nums
    """
    ten_fold_error_nums = 0
    for i in range(10):
        train_X = folds[:i] + folds[i + 1:]
        train_y = y[:i] + y[i + 1:]
        val_X = folds[i]
        val_y = y[i]
        train_X = np.array(train_X).reshape(-1, 5)  # -1指的是在不知道有多少行的情況下直接進行劃分,最終為shape[n,4]
        train_y = np.array(train_y).reshape([-1, 1])
        val_X = np.array(val_X).reshape(-1, 5)
        val_y = np.array(val_y).reshape([-1, 1])

        beta = LR(train_X, train_y)
        error_num = testing(beta, val_X, val_y)
        ten_fold_error_nums += error_num
    return ten_fold_error_nums


def Loo(X, y):
    """
    留一法進行預測
    :param X: list with shape[100,4]
    :param y: list with shape[100]
    :return: Loo_error_nums
    """
    loo_error_nums = 0
    for i in range(100):
        train_X = X[:i] + X[i + 1:]
        train_y = y[:i] + y[i + 1:]
        val_X = X[i]
        val_y = y[i]
        train_X = np.array(train_X).reshape(-1, 5)
        train_y = np.array(train_y).reshape(-1, 1)
        val_X = np.array(val_X).reshape(-1, 5)
        val_y = np.array(val_y).reshape(-1, 1)

        beta = LR(train_X, train_y)
        error_num = testing(beta, val_X, val_y)
        loo_error_nums += error_num
    return loo_error_nums


if __name__ == '__main__':
    dataset = 'C:\\Users\\14399\\Desktop\\iris.csv'
    X1, X2, X3, y1, y2, y3 = readData(dataset)
    # 十折交叉驗證
    # X1 and X2
    folds, y = tenfolddata(X1, X2)
    # print(folds)
    round1_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
    print(round1_ten_fold_error_nums)
    # X1 and X3
    folds, y = tenfolddata(X1, X3)
    round2_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
    print(round2_ten_fold_error_nums)
    # X2 and X3
    folds, y = tenfolddata(X2, X3)
    round3_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
    print(round3_ten_fold_error_nums)

    # 留一法
    # X1 and X2
    X = X1 + X2
    y = [1] * len(X1) + [0] * len(X2)
    round1_Loo_error_nums = Loo(X, y)
    print(round1_Loo_error_nums)
    # X1 and X3
    X = X1 + X3
    y = [1] * len(X1) + [0] * len(X3)
    round2_Loo_error_nums = Loo(X, y)
    print(round2_Loo_error_nums)
    # X2 and X3
    X = X2 + X3
    y = [1] * len(X2) + [0] * len(X3)
    round3_Loo_error_nums = Loo(X, y)
    print(round3_Loo_error_nums)

結果:十折交叉: 0       0       15           ////對X進行拓展後的結果更好了,分別為: 0 0 3  和  0 0 4
               留一法: 0       0       11

 

資料集:UCI  iris資料集  

連結:https://pan.baidu.com/s/1CWMvPZdsYsKYncJsl0P5bQ  提取碼:lx4r

參考:https://blog.csdn.net/VictoriaW/article/details/77989486