TensorFlow學習筆記二Titanic題目實戰
阿新 • • 發佈:2019-01-03
1. data.info()可以檢視資料的基本status
2. DataFrame.apply(func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds)
引數:
(1)func:應用於每個列/行的函式
(2)axis: {0 or ‘index’, 1 or ‘columns’}, 預設是0。
- 0 or ‘index’: 將func應用與每列
- 1 or ‘columns’:將func應用於每行
輸入佔位符的形狀,None表示該維度可以為任意值。
4. tf.Variable(tf.random_normal([6,2]),name='weights')宣告/初始化變數
5. tf.add_to_collection('vars', W),建立容器,收集了tensor變數W.
6. y_pred = tf.nn.softmax(tf.add(tf.matmul(X,W),b)) 建立計算節點,tf.matmul是矩陣相乘的矩陣,tf.add是矩陣相加的矩陣,
tf.nn.softmax是呼叫softmax函式。
7. tf.reduce_sum(), tf.reduce_mean() 分別求和和平均數。
8 train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
其中tf.train.GradientDescentOptimizer是訓練過程使用梯度下降最優化,0.001是學習率,minimize表示目標函式是最小化。
9. tf.global_variables_initializer() 用於初始化所有的變數
#Titanic題目實戰 import pandas as pd from sklearn.model_selection import train_test_split import tensorflow as tf import numpy as np data = pd.read_csv('titanic_train.csv') print(data.info()) #檢視資料概況 #取部分特徵欄位用於分類,並將所有缺失的欄位填充為0 data['Sex'] = data['Sex'].apply(lambda s: 1 if s=='male' else 0) data = data.fillna(0) dataset_X = data[['Sex','Age','Pclass','SibSp','Parch','Fare']] dataset_X = dataset_X.as_matrix() #兩種分類分別為倖存和死亡,‘Survived’欄位是其中一種分類的標籤 #新增‘Deceased’表示第二種分類的標籤,取值為‘Survived’欄位取非 data['Deceased'] = data['Survived'].apply(lambda s: int(not s)) dataset_Y = data[['Deceased','Survived']] dataset_Y = dataset_Y.as_matrix() X_train,X_test,y_train,y_test = train_test_split(dataset_X,dataset_Y,\ test_size=0.2,random_state=42) #構建計算圖 #宣告輸入資料佔位符 #shape引數的第一個元素為None,表示可以同時放入任意條記錄 X = tf.placeholder(tf.float32,shape=[None,6]) y = tf.placeholder(tf.float32,shape=[None,2]) #宣告變數 W = tf.Variable(tf.random_normal([6,2]),name='weights') b = tf.Variable(tf.zeros([2]),name='bias') #建立容器vars。它收集了tensor變數W和b。之後,tensorflow將這一容器儲存 tf.add_to_collection('vars', W) tf.add_to_collection('vars', b) #邏輯迴歸的公式 y_pred = tf.nn.softmax(tf.add(tf.matmul(X,W),b)) #宣告代價函式:使用交叉熵作為代價函式 cross_entroy = -tf.reduce_sum(y*tf.log(y_pred + 1e-10)) cost = tf.reduce_mean(cross_entroy) #加入優化演算法:其中0.001是learning rate train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost) #定義saver saver = tf.train.Saver() #構建訓練迭代過程 with tf.Session() as sess: #初始化所有變數,必須最先執行 # sess.run(tf.global_variables_initializer()) tf.global_variables_initializer().run() #以下為訓練迭代,迭代10輪 for epoch in range(10): total_loss = 0 for i in range(len(X_train)): feed = {X:[X_train[i]], y: [y_train[i]]} #通過session.run介面觸發執行 _,loss = sess.run([train_op, cost], feed_dict=feed) total_loss += loss print('Epoch: %04d, total loss=%.9f' %(epoch+1,total_loss)) print('Training complete!') pred = sess.run(y_pred,feed_dict={X:X_train}) correct = np.equal(np.argmax(pred,1),np.argmax(y_train,1)) accuracy = np.mean(correct.astype(np.float32)) print('Accuracy on validation set: %.9f'%accuracy) #儲存變數 # saver.save(sess,'./modelVar/model.ckpt') # 上面的程式碼執行結束後,當前目錄下出現四個檔案: # my-model.ckpt.meta # my-model.ckpt.data-* # my-model.ckpt.index # checkpoint # 利用這四個檔案就能恢復出 w1和w2這兩個變數。 W = tf.Variable(tf.random_normal([6,2]),name='weights') b = tf.Variable(tf.zeros([2]),name='bias') X = tf.placeholder(tf.float32,shape=[None,6]) yHat = tf.nn.softmax(tf.add(tf.matmul(X,W),b)) Weights = [] with tf.Session() as sess1: tf.global_variables_initializer().run() model_file=tf.train.latest_checkpoint('./modelVar') saver.restore(sess1,model_file) all_vars = tf.get_collection('vars') for i,v in enumerate(all_vars): # print('v',v) # print('vname',v.name) v_ = v.eval() # sess.run(v) # print(i,v_) Weights.append(v_) Weights9 = Weights[14] bias9 = Weights[15] y_Hat = sess1.run(yHat,feed_dict={X:X_test,W:Weights9,b:bias9}) #預測測試資料結果 testdata = pd.read_csv('titanic_test.csv') testdata = testdata.fillna(0) testdata['Sex'] = testdata['Sex'].apply(lambda s: 1 if s == 'male' else 0) XTest = testdata[['Sex','Age','Pclass','SibSp','Parch','Fare']] #開啟session進行預測 with tf.Session() as sess2: tf.global_variables_initializer().run() predictions = np.argmax(sess2.run(yHat,feed_dict={X:XTest,W:Weights9,b:bias9}),1) #構建提交結果的資料結構,並將結果儲存為csv檔案 submission = pd.DataFrame({'PassengerId':testdata['PassengerId'],\ 'Survived':predictions}) submission.to_csv('mySubmission201712.csv',index=False)