1. 程式人生 > >機器學習——決策樹

機器學習——決策樹

 1 import numpy as np
 2 import pandas as pd
 3 from sklearn.feature_extraction import DictVectorizer
 4 from sklearn.model_selection import train_test_split
 5 from sklearn.tree import DecisionTreeClassifier
 6 from sklearn.metrics import r2_score
 7 
 8 
 9 def titanic_tree():
10     # 獲取資料
11     df = pd.read_csv('
Titanic.csv') 12 # df = df.fillna(0) 13 # dict_train = df.loc[:, ['Pclass', 'Age', 'Sex']].to_dict(orient='record') 14 # dict_target = pd.DataFrame(df['Survived'], columns=['Survived']).to_dict(orient='record') 15 # x_train, x_test, y_train, y_test = train_test_split(dict_train, dict_target, test_size=0.25)
16 17 # 處理資料,找出特徵值和目標值 18 x = df.loc[:, ['Pclass', 'Age', 'Sex']] 19 y = df.loc[:, ['Survived']] 20 # 缺失值處理 21 x['Age'].fillna(x['Age'].mean(), inplace=True) 22 # 分割資料集到訓練集和測試集 23 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25) 24 # print(y_test)
25 dv_train = DictVectorizer(sparse=False) 26 x_train = dv_train.fit_transform(x_train.to_dict(orient='record')) 27 x_test = dv_train.transform(x_test.to_dict(orient='record')) 28 29 dv_target = DictVectorizer(sparse=False) 30 y_target = dv_target.fit_transform(y_train.to_dict(orient='record')) 31 y_test = dv_target.transform(y_test.to_dict(orient='record')) 32 # print(y_test) 33 # 用決策樹進行預測 34 d_tree = DecisionTreeClassifier() 35 d_tree.fit(x_train, y_train) 36 37 data_predict = { 38 'Pclass': 1, 39 'Age': 38, 40 'Sex': 'female' 41 42 } 43 44 x_data = dv_train.transform(data_predict) 45 print(dv_target.inverse_transform(d_tree.predict(x_data).reshape(-1,1))) 46 # print(d_tree.predict(x_test)) 47 # print(y_test) 48 # 預測準確率 49 # print(d_tree.score(x_test, y_test)) 50 51 52 if __name__ == '__main__': 53 titanic_tree()