吳裕雄 python 機器學習——集成學習AdaBoost算法分類模型
阿新 • • 發佈:2019-05-02
繪圖 tor git bubuko boost 算法 select enumerate tar
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklearn.model_selection import train_test_split def load_data_classification(): ‘‘‘ 加載用於分類問題的數據集 ‘‘‘ # 使用 scikit-learn 自帶的 digits 數據集 digits=datasets.load_digits() #分層采樣拆分成訓練集和測試集,測試集大小為原始數據集大小的 1/4 return train_test_split(digits.data,digits.target,test_size=0.25,random_state=0,stratify=digits.target) #集成學習AdaBoost算法分類模型 def test_AdaBoostClassifier(*data): ‘‘‘ 測試 AdaBoostClassifier 的用法,繪制 AdaBoostClassifier 的預測性能隨基礎分類器數量的影響 ‘‘‘ X_train,X_test,y_train,y_test=data clf=ensemble.AdaBoostClassifier(learning_rate=0.1) clf.fit(X_train,y_train) ## 繪圖 fig=plt.figure() ax=fig.add_subplot(1,1,1) estimators_num=len(clf.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="best") ax.set_title("AdaBoostClassifier") plt.show() # 獲取分類數據 X_train,X_test,y_train,y_test=load_data_classification() # 調用 test_AdaBoostClassifier test_AdaBoostClassifier(X_train,X_test,y_train,y_test)
def test_AdaBoostClassifier_base_classifier(*data): ‘‘‘ 測試 AdaBoostClassifier 的預測性能隨基礎分類器數量和基礎分類器的類型的影響 ‘‘‘ from sklearn.naive_bayes import GaussianNB X_train,X_test,y_train,y_test=data fig=plt.figure() ax=fig.add_subplot(2,1,1) ########### 默認的個體分類器 ############# clf=ensemble.AdaBoostClassifier(learning_rate=0.1) clf.fit(X_train,y_train) ## 繪圖 estimators_num=len(clf.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(0,1) ax.set_title("AdaBoostClassifier with Decision Tree") ####### Gaussian Naive Bayes 個體分類器 ######## ax=fig.add_subplot(2,1,2) clf=ensemble.AdaBoostClassifier(learning_rate=0.1,base_estimator=GaussianNB()) clf.fit(X_train,y_train) ## 繪圖 estimators_num=len(clf.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score") ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score") ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_ylim(0,1) ax.set_title("AdaBoostClassifier with Gaussian Naive Bayes") plt.show() # 調用 test_AdaBoostClassifier_base_classifier test_AdaBoostClassifier_base_classifier(X_train,X_test,y_train,y_test)
def test_AdaBoostClassifier_learning_rate(*data): ‘‘‘ 測試 AdaBoostClassifier 的預測性能隨學習率的影響 ‘‘‘ X_train,X_test,y_train,y_test=data learning_rates=np.linspace(0.01,1) fig=plt.figure() ax=fig.add_subplot(1,1,1) traing_scores=[] testing_scores=[] for learning_rate in learning_rates: clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,n_estimators=500) clf.fit(X_train,y_train) traing_scores.append(clf.score(X_train,y_train)) testing_scores.append(clf.score(X_test,y_test)) ax.plot(learning_rates,traing_scores,label="Traing score") ax.plot(learning_rates,testing_scores,label="Testing score") ax.set_xlabel("learning rate") ax.set_ylabel("score") ax.legend(loc="best") ax.set_title("AdaBoostClassifier") plt.show() # 調用 test_AdaBoostClassifier_learning_rate test_AdaBoostClassifier_learning_rate(X_train,X_test,y_train,y_test)
def test_AdaBoostClassifier_algorithm(*data): ‘‘‘ 測試 AdaBoostClassifier 的預測性能隨學習率和 algorithm 參數的影響 ‘‘‘ X_train,X_test,y_train,y_test=data algorithms=[‘SAMME.R‘,‘SAMME‘] fig=plt.figure() learning_rates=[0.05,0.1,0.5,0.9] for i,learning_rate in enumerate(learning_rates): ax=fig.add_subplot(2,2,i+1) for i ,algorithm in enumerate(algorithms): clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,algorithm=algorithm) clf.fit(X_train,y_train) ## 繪圖 estimators_num=len(clf.estimators_) X=range(1,estimators_num+1) ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="%s:Traing score"%algorithms[i]) ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="%s:Testing score"%algorithms[i]) ax.set_xlabel("estimator num") ax.set_ylabel("score") ax.legend(loc="lower right") ax.set_title("learing rate:%f"%learning_rate) fig.suptitle("AdaBoostClassifier") plt.show() # 調用 test_AdaBoostClassifier_algorithm test_AdaBoostClassifier_algorithm(X_train,X_test,y_train,y_test)
吳裕雄 python 機器學習——集成學習AdaBoost算法分類模型