1. 程式人生 > >吳裕雄 python 機器學習——模型選擇數據集切分

吳裕雄 python 機器學習——模型選擇數據集切分

ets for sco true targe pytho target 學習 del

import  numpy as np
from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut,cross_val_score

#模型選擇數據集切分train_test_split模型
def test_train_test_split():
    X=[[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34],
       [41,42,43,44],
       [
51,52,53,54], [61,62,63,64], [71,72,73,74]] y=[1,1,0,0,1,1,0,0] # 切分,測試集大小為原始數據集大小的 40% X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0) print("X_train=",X_train) print("X_test=",X_test) print("y_train=",y_train) print
("y_test=",y_test) # 分層采樣切分,測試集大小為原始數據集大小的 40% X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,random_state=0,stratify=y) print("Stratify:X_train=",X_train) print("Stratify:X_test=",X_test) print("Stratify:y_train=",y_train) print("Stratify:y_test="
,y_test) test_train_test_split()

技術分享圖片

#模型選擇數據集切分KFold模型
def test_KFold():
    X=np.array([[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34],
       [41,42,43,44],
       [51,52,53,54],
       [61,62,63,64],
       [71,72,73,74],
       [81,82,83,84]])
    y=np.array([1,1,0,0,1,1,0,0,1])
    # 切分之前不混洗數據集
    folder=KFold(n_splits=3,random_state=0,shuffle=False) 
    for train_index,test_index in folder.split(X,y):
        print("Train Index:",train_index)
        print("Test Index:",test_index)
        print("X_train:",X[train_index])
        print("X_test:",X[test_index])
        print("")
    # 切分之前混洗數據集
    shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True) 
    for train_index,test_index in shuffle_folder.split(X,y):
        print("Shuffled Train Index:",train_index)
        print("Shuffled Test Index:",test_index)
        print("Shuffled X_train:",X[train_index])
        print("Shuffled X_test:",X[test_index])
        print("")
        
test_KFold()

技術分享圖片

#模型選擇數據集切分StratifiedKFold模型
def test_StratifiedKFold():
    X=np.array([[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34],
       [41,42,43,44],
       [51,52,53,54],
       [61,62,63,64],
       [71,72,73,74]])

    y=np.array([1,1,0,0,1,1,0,0])

    folder=KFold(n_splits=4,random_state=0,shuffle=False)
    stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
    for train_index,test_index in folder.split(X,y):
        print("Train Index:",train_index)
        print("Test Index:",test_index)
        print("y_train:",y[train_index])
        print("y_test:",y[test_index])
        print("")

    for train_index,test_index in stratified_folder.split(X,y):
        print("Stratified Train Index:",train_index)
        print("Stratified Test Index:",test_index)
        print("Stratified y_train:",y[train_index])
        print("Stratified y_test:",y[test_index])
        print("")
        
test_StratifiedKFold()

技術分享圖片

#模型選擇數據集切分LeaveOneOut模型
def test_LeaveOneOut():
    X=np.array([[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34]])
    y=np.array([1,1,0,0])
    lo=LeaveOneOut()
    for train_index,test_index in lo.split(X):
        print("Train Index:",train_index)
        print("Test Index:",test_index)
        print("X_train:",X[train_index])
        print("X_test:",X[test_index])
        print("")
        
test_LeaveOneOut()

技術分享圖片

#模型選擇數據集切分cross_val_score模型
def test_cross_val_score():
    from sklearn.datasets import  load_digits
    from sklearn.svm import  LinearSVC
    digits=load_digits() # 加載用於分類問題的數據集
    X=digits.data
    y=digits.target
    # 使用 LinearSVC 作為分類器
    result=cross_val_score(LinearSVC(),X,y,cv=10) 
    print("Cross Val Score is:",result)
    
test_cross_val_score()

技術分享圖片

吳裕雄 python 機器學習——模型選擇數據集切分