神經網路貸款風險評估(base on keras and python )
阿新 • • 發佈:2019-02-13
用我兒子的話說,有一天啊,小烏龜遇見小兔子………
有一天,我在網上看到這樣一片文章,決策書做貸款決策分析。
import pandas as pd
df = pd.read_csv('loans.csv')
#print(df.head())
X = df.drop('safe_loans', axis=1)
y = df.safe_loans
#change categorical
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
d = defaultdict(LabelEncoder)
X_trans = X.apply(lambda x: d[x.name].fit_transform(x))
X_trans.head()
#X_trans.to_excel('X_trans.xls')
#random take train and test
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_trans, y, random_state=1)
#call decision tree
from sklearn import tree
clf = tree.DecisionTreeClassifier(max_depth=8 )
clf = clf.fit(X_train, y_train)
test_rec = X_test.iloc[1,:]
clf.predict([test_rec])
y_test.iloc[1]
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, clf.predict(X_test)))
這篇文章寫的非常好,從中學到好多,但是計算的正確率不太高,8層的決策樹正確率才能達到0.645
0.645480347467
我用神經網路重新做了計算
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 17 21:14:08 2017
@author: luogan
"""
#read data
import pandas as pd
df = pd.read_csv('loans.csv')
#print(df.head())
#X = df.drop('safe_loans', axis=1)
X = df.drop(['safe_loans' ],axis=1)
y = df.safe_loans
#change categorical
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
d = defaultdict(LabelEncoder)
X_trans = X.apply(lambda x: d[x.name].fit_transform(x))
X_trans.head()
#X_trans.to_excel('X_trans.xls')
##############
data_train=X_trans
data_max = data_train.max()
data_min = data_train.min()
data_mean = data_train.mean()
#
# data_std = data_train.std()
X_train1 = (data_train-data_max)/(data_max-data_min)
y=0.5*(y+1)
#random take train and test
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X_train1, y, random_state=1)
#x_train.to_excel('xx_trans.xls')
#y_train.to_excel('y_trans.xls')
#call decision tree
#from sklearn import tree
#clf = tree.DecisionTreeClassifier(max_depth=10)
#clf = clf.fit(X_train, y_train)
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
model = Sequential() #建立模型
model.add(Dense(input_dim = 12, output_dim = 48)) #新增輸入層、隱藏層的連線
model.add(Activation('tanh')) #以Relu函式為啟用函式
model.add(Dense(input_dim = 48, output_dim = 48)) #新增隱藏層、隱藏層的連線
model.add(Activation('relu')) #以Relu函式為啟用函式
model.add(Dropout(0.2))
model.add(Dense(input_dim = 48, output_dim = 36)) #新增隱藏層、隱藏層的連線
model.add(Activation('relu')) #以Relu函式為啟用函式
model.add(Dropout(0.2))
model.add(Dense(input_dim = 36, output_dim = 36)) #新增隱藏層、隱藏層的連線
model.add(Activation('relu')) #以Relu函式為啟用函式
model.add(Dense(input_dim = 36, output_dim = 12)) #新增隱藏層、隱藏層的連線
model.add(Activation('relu')) #以Relu函式為啟用函式
model.add(Dense(input_dim = 12, output_dim = 12)) #新增隱藏層、隱藏層的連線
model.add(Activation('relu')) #以Relu函式為啟用函式
model.add(Dense(input_dim = 12, output_dim = 1)) #新增隱藏層、輸出層的連線
model.add(Activation('sigmoid')) #以sigmoid函式為啟用函式
#編譯模型,損失函式為binary_crossentropy,用adam法求解
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train.values, y_train.values, nb_epoch = 70, batch_size = 2000) #訓練模型
r = pd.DataFrame(model.predict_classes(x_test.values))
'''
r = pd.DataFrame(model.predict(x_test.values))
rr=r.values
tr=rr.flatten()
for i in range(tr.shape[0]):
if tr[i]>0.5:
tr[i]=1
else:
tr[i]=0
'''
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, r))
0.650640749978
我對神經網路進行了各種優化,正確率一直上不去,計算結果還不如我的神經網路股票預測程式碼
我不高興,非常不高興,65%的正確率是我無法容忍的,讓我鬱悶的是,我一直深愛的神經網路居然也這麼無力
不能這樣下去,下回我們將採用傳說中的xgboost試一下
提取碼 jc5x