機器學習之迴歸決策樹DecisionTreeRegressor
阿新 • • 發佈:2018-11-29
- 機器學習之迴歸決策樹DecisionTreeRegressor
# -*- coding: utf-8 -*- """ Created on Fri Nov 23 20:00:23 2018 @author: muli """ import numpy as np from sklearn.tree import DecisionTreeRegressor from sklearn import cross_validation import matplotlib.pyplot as plt def creat_data(n): ''' 產生用於迴歸問題的資料集 :param n: 資料集容量 :return: 返回一個元組, 元素依次為:訓練樣本集、測試樣本集、訓練樣本集對應的值、測試樣本集對應的值 ''' np.random.seed(0) # 產生 n*1 的矩陣,隨機值為[0,1)之間 X = 5 * np.random.rand(n, 1) # 正弦值 y = np.sin(X).ravel() noise_num=(int)(n/5) # 每第5個樣本,就在該樣本的值上新增噪音 y[::5] += 3 * (0.5 - np.random.rand(noise_num)) return cross_validation.train_test_split(X, y, test_size=0.25,random_state=1) # 拆分原始資料集為訓練集和測試集,其中測試集大小為元素資料集大小的 1/4 def test_DecisionTreeRegressor(*data): ''' 測試 DecisionTreeRegressor 的用法 :param data: 可變引數。它是一個元組,這裡要求其元素依次為:訓練樣本集、測試樣本集、訓練樣本的值、測試樣本的值 :return: None ''' X_train,X_test,y_train,y_test=data regr = DecisionTreeRegressor() regr.fit(X_train, y_train) print("Training score:%f"%(regr.score(X_train,y_train))) print("Testing score:%f"%(regr.score(X_test,y_test))) ##繪圖 fig=plt.figure() ax=fig.add_subplot(1,1,1) X = np.arange(0.0, 5.0, 0.05)[:, np.newaxis] # 看預測 迴歸樹的效果 Y = regr.predict(X) ax.scatter(X_train, y_train, label="train sample",c='g') ax.scatter(X_test, y_test, label="test sample",c='r') ax.plot(X, Y, label="predict_value", linewidth=2,alpha=0.5) ax.set_xlabel("data") ax.set_ylabel("target") ax.set_title("Decision Tree Regression") ax.legend(framealpha=0.5) plt.show() def test_DecisionTreeRegressor_splitter(*data): ''' 檢驗 隨機劃分與最優劃分的影響 :param data: 可變引數。 它是一個元組,這裡要求其元素依次為:訓練樣本集、測試樣本集、訓練樣本的值、測試樣本的值 :return: None ''' X_train,X_test,y_train,y_test=data splitters=['best','random'] for splitter in splitters: regr = DecisionTreeRegressor(splitter=splitter) regr.fit(X_train, y_train) print("Splitter %s"%splitter) print("Training score:%f"%(regr.score(X_train,y_train))) print("Testing score:%f"%(regr.score(X_test,y_test))) print("----------------------") def test_DecisionTreeRegressor_depth(*data,maxdepth): ''' 預測效能隨 max_depth 的影響 :param data: 可變引數。它是一個元組,這裡要求其元素依次為:訓練樣本集、測試樣本集、訓練樣本的值、測試樣本的值 :param maxdepth: 一個整數,它作為 DecisionTreeRegressor 的 max_depth 引數 :return: None ''' X_train,X_test,y_train,y_test=data # maxdepth表示最大深度 depths=np.arange(1,maxdepth) training_scores=[] testing_scores=[] for depth in depths: regr = DecisionTreeRegressor(max_depth=depth) regr.fit(X_train, y_train) training_scores.append(regr.score(X_train,y_train)) testing_scores.append(regr.score(X_test,y_test)) ## 繪圖 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(depths,training_scores,label="traing score") ax.plot(depths,testing_scores,label="testing score") ax.set_xlabel("maxdepth") ax.set_ylabel("score") ax.set_title("Decision Tree Regression") ax.legend(framealpha=0.5) plt.show() if __name__=='__main__': X_train,X_test,y_train,y_test=creat_data(100) # 產生用於迴歸問題的資料集 # test_DecisionTreeRegressor(X_train,X_test,y_train,y_test) # 呼叫 test_DecisionTreeRegressor # test_DecisionTreeRegressor_splitter(X_train,X_test,y_train,y_test) # 呼叫 test_DecisionTreeRegressor_splitter test_DecisionTreeRegressor_depth(X_train,X_test,y_train,y_test,maxdepth=20) # 呼叫 test_DecisionTreeRegressor_depth