吳裕雄 python 機器學習——模型選擇數據集切分

阿新 • • 發佈：2019-05-02

ets for sco true targe pytho target 學習 del

import  numpy as np
from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut,cross_val_score

#模型選擇數據集切分train_test_split模型
def test_train_test_split():
    X=[[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34],
       [41,42,43,44],
       [ 
51,52,53,54],
       [61,62,63,64],
       [71,72,73,74]]
    y=[1,1,0,0,1,1,0,0]
    # 切分，測試集大小為原始數據集大小的 40%
    X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0) 
    print("X_train=",X_train)
    print("X_test=",X_test)
    print("y_train=",y_train)
    print 
("y_test=",y_test)
    # 分層采樣切分，測試集大小為原始數據集大小的 40%
    X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,random_state=0,stratify=y) 
    print("Stratify:X_train=",X_train)
    print("Stratify:X_test=",X_test)
    print("Stratify:y_train=",y_train)
    print("Stratify:y_test=" 
,y_test)
    
test_train_test_split()

技術分享圖片

#模型選擇數據集切分KFold模型
def test_KFold():
    X=np.array([[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34],
       [41,42,43,44],
       [51,52,53,54],
       [61,62,63,64],
       [71,72,73,74],
       [81,82,83,84]])
    y=np.array([1,1,0,0,1,1,0,0,1])
    # 切分之前不混洗數據集
    folder=KFold(n_splits=3,random_state=0,shuffle=False) 
    for train_index,test_index in folder.split(X,y):
        print("Train Index:",train_index)
        print("Test Index:",test_index)
        print("X_train:",X[train_index])
        print("X_test:",X[test_index])
        print("")
    # 切分之前混洗數據集
    shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True) 
    for train_index,test_index in shuffle_folder.split(X,y):
        print("Shuffled Train Index:",train_index)
        print("Shuffled Test Index:",test_index)
        print("Shuffled X_train:",X[train_index])
        print("Shuffled X_test:",X[test_index])
        print("")
        
test_KFold()

技術分享圖片

#模型選擇數據集切分StratifiedKFold模型
def test_StratifiedKFold():
    X=np.array([[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34],
       [41,42,43,44],
       [51,52,53,54],
       [61,62,63,64],
       [71,72,73,74]])

    y=np.array([1,1,0,0,1,1,0,0])

    folder=KFold(n_splits=4,random_state=0,shuffle=False)
    stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
    for train_index,test_index in folder.split(X,y):
        print("Train Index:",train_index)
        print("Test Index:",test_index)
        print("y_train:",y[train_index])
        print("y_test:",y[test_index])
        print("")

    for train_index,test_index in stratified_folder.split(X,y):
        print("Stratified Train Index:",train_index)
        print("Stratified Test Index:",test_index)
        print("Stratified y_train:",y[train_index])
        print("Stratified y_test:",y[test_index])
        print("")
        
test_StratifiedKFold()

技術分享圖片

#模型選擇數據集切分LeaveOneOut模型
def test_LeaveOneOut():
    X=np.array([[1,2,3,4],
       [11,12,13,14],
       [21,22,23,24],
       [31,32,33,34]])
    y=np.array([1,1,0,0])
    lo=LeaveOneOut()
    for train_index,test_index in lo.split(X):
        print("Train Index:",train_index)
        print("Test Index:",test_index)
        print("X_train:",X[train_index])
        print("X_test:",X[test_index])
        print("")
        
test_LeaveOneOut()

技術分享圖片

#模型選擇數據集切分cross_val_score模型
def test_cross_val_score():
    from sklearn.datasets import  load_digits
    from sklearn.svm import  LinearSVC
    digits=load_digits() # 加載用於分類問題的數據集
    X=digits.data
    y=digits.target
    # 使用 LinearSVC 作為分類器
    result=cross_val_score(LinearSVC(),X,y,cv=10) 
    print("Cross Val Score is:",result)
    
test_cross_val_score()

技術分享圖片

吳裕雄 python 機器學習——模型選擇數據集切分

ets for sco true targe pytho target 學習 del import numpy as np from sklearn.model_selection import train_test_split,KFold,StratifiedKF

吳裕雄 python 機器學習——模型選擇損失函數模型

als ota spa average 分享 img bsp total info from sklearn.metrics import zero_one_loss,log_loss def test_zero_one_loss(): y_true=[

吳裕雄 python 機器學習——模型選擇學習曲線learning_curve模型

() otl .fig 均值 tween sting dataset testing atp import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC

吳裕雄 python 機器學習——KNN回歸KNeighborsRegressor模型

sklearn sco endpoint matplot data create urn sting 學習 import numpy as np import matplotlib.pyplot as plt from sklearn import neighb

吳裕雄 python 機器學習——密度聚類DBSCAN模型

ted itl 機器學習 blob pri plt matplot space ase import numpy as np import matplotlib.pyplot as plt from sklearn import cluster from s

吳裕雄 python 機器學習——層次聚類AgglomerativeClustering模型

figure div fig ted app 中心 import pan clas import numpy as np import matplotlib.pyplot as plt from sklearn import cluster from skl

吳裕雄 python 機器學習——人工神經網絡與原始感知機模型

res true 組成 param 個數 its import sample gen import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d impor

吳裕雄 python 機器學習——半監督學習LabelSpreading模型

info cti 預測 knn mas 設定分享 test int import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn im

吳裕雄 python 機器學習——集成學習AdaBoost算法分類模型

繪圖 tor git bubuko boost 算法 select enumerate tar import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ens

吳裕雄 python 機器學習——集成學習隨機森林RandomForestClassifier分類模型

pan spa data ica ima 數據 app ase lower import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble fro

吳裕雄 python 機器學習——數據預處理標準化MinMaxScaler模型

roc .data pre 機器學習標準 def print style from sklearn.preprocessing import MinMaxScaler #數據預處理標準化MinMaxScaler模型 def test_MinMaxScaler

吳裕雄 python 機器學習——數據預處理二元化OneHotEncoder模型

機器模型 ans image parse def clas proc 調用 from sklearn.preprocessing import OneHotEncoder #數據預處理二元化OneHotEncoder模型 def test_OneHotEnco

吳裕雄 python 機器學習——數據預處理標準化StandardScaler模型

process pytho python er模型 ear bubuko 數據預處理 div ssi from sklearn.preprocessing import StandardScaler #數據預處理標準化StandardScaler模型 def t

吳裕雄 python 機器學習——數據預處理嵌入式特征選擇

abs alpha digits mod 分享圖片 mage ylabel near numpy import numpy as np import matplotlib.pyplot as plt from sklearn.svm import Linear

吳裕雄 python 機器學習——數據預處理流水線Pipeline模型

score 預處理處理 svm set urn 數據預處理 ase spa from sklearn.svm import LinearSVC from sklearn.pipeline import Pipeline from sklearn import ne

吳裕雄 python 機器學習——數據預處理字典學習模型

sklearn 數據預處理 tran imp cti 圖片 component ict form from sklearn.decomposition import DictionaryLearning #數據預處理字典學習DictionaryLearning模型

吳裕雄 python 機器學習-KNN演算法（1）

import numpy as np import operator as op from os import listdir def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[0] diffMat

吳裕雄 python 機器學習-KNN算法（1）

files action ets %s set digits size ret src import numpy as np import operator as op from os import listdir def classify0(inX, dataSet,

吳裕雄 python 機器學習-DMT（2）

nod clas png split rom {} items port info import matplotlib.pyplot as plt decisionNode = dict(boxstyle="sawtooth", fc="0.8") leafNode =

吳裕雄 python 機器學習-DMT（1）

import numpy as np import operator as op from math import log def createDataSet(): dataSet = [[1, 1, 'yes'], [1, 1, 'yes'],

吳裕雄 python 機器學習——模型選擇數據集切分

相關推薦