Andrew Ng 深度學習課程deeplearning.ai 程式設計作業——shallow network for datesets classification (1-3)
##Planar data classification with one hidden layer ##
1.常用的Python Library
numpy:is the fundamental package for scientific computing with Python
sklearn:provides simple and efficient tools for data mining and data analysis
matplotlib: is a library for plotting graphs in Python
import numpy as np import sklearn import matplotlib.pyplot as plt
2.Dataset
隨機生成資料集,如下生成形如“flower”的資料集並進行視覺化:
def load_planar_dataset(): #generate two random array X and Y np.random.seed(1) m=400 #樣本的數量 N=int(m/2) #每一類的數量,共有倆類資料 D=2 #維數,二維資料 X=np.zeros((m,D)) # 生成(m,2)獨立的樣本 Y=np.zeros((m,1),dtype="uint8") #生成(m,1)矩陣的樣本 a=4 #maximum ray of the flower for j in range(2): ix=range(N*j,N*(j+1)) #範圍在[N*j,N*(j+1)]之間 t=np.linspace(j*3.12,(j+1)*3.12,N)+np.random.randn(N)*0.2 #theta r=a*np.sin(4*t)+np.random.randn(N)*0.2 #radius X[ix]=np.c_[r*np.sin(t),r*np.cos(t)] # (m,2),使用np.c_是為了形成(m,2)矩陣 Y[ix]=j X=X.T #(2,m) Y=Y.T # (1,m) return X,Y
對上述資料進行視覺化;
plt.scatter(X[0,:],X[1,:],c=Y,s=40,cmap=plt.cm.Spectral)
plt.show()
3.簡單的Logistic Regression##
clf=sklearn.linear_model.LogisticRegressionCV() #邏輯迴歸分類器
clf.fit(X.T,Y.T) #對資料進行擬合 X.T (400,2) Y.T (400,1),同時完成迭代
進行訓練之後,我們需要畫分邊界,對這些資料進行分類,如果是用簡單的Logistic Regression的話,可能得到的效果不是很好,程式碼如下:
def plot_decision_boundary(model, X, y): # Set min and max values and give it some padding x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1 #得出x軸第一行的最小和第二行的最大 y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1 #得出y周第一行的最小和第二行的最大 h = 0.01 #step # Generate a grid of points with distance h between them xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Predict the function value for the whole grid Z = model(np.c_[xx.ravel(), yy.ravel()]) #np.c_[xx.ravel(),yy.ravel()]得到覆蓋影象的點,這裡需要知道.ravel(),np.c_[] 的操作,並對這些點進行預測 Z = Z.reshape(xx.shape) #進行reshape一下保證是((y_max-y_min)/0.01,(x_max-x_min)/0.01),就是保證覆蓋了整個平面 # Plot the contour and training examples plt.contour(xx, yy, Z, cmap=plt.cm.Spectral) plt.ylabel('x2') plt.xlabel('x1') plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)
那麼可以執行以下結果:
plot_decision_boundary(lambda x:clf.predict(x),X,Y)
plt.title("Logistic Regression")
plt.show()
LR_predictions=clf.predict(X.T) #注意這裡是將X中的點給放了進去,不是整個畫面的點
print("Accuracy of logistic regression:%d"% float((np.dot(Y,LR_predictions)+np.dot(1-Y,1-LR_predictions))/float(Y.size)*100)+'%')
Accuracy of logistic regression:47%
4.Neural Netwokrk model
(1)通過輸入和輸出來判斷輸入層和輸出層的尺寸,同時定義中間層(這裡只有一層中間層):
def layer_size(X,Y):
n_x=X.shape[0]
n_h=4
n_y=Y.shape[0]
return (n_x,n_h,n_y)
-------------------------------------------------------------------------------------------------------------------
測試上述函式是否準確,我們用一個函式來隨機生成X和Y矩陣,並對上面函式進行評價:
def layer_size_test_case(): ##測試layer_size(X,Y)這個函式是否正確
np.random.seed(1) #這裡將隨機初始化的種子設為1
x_assess=np.random.randn(5,3)
y_assess=np.random.randn(2,3)
return x_assess,y_assess
x_assess,y_assess=layer_size_test_case()
(n_x,n_h,n_y)=layer_size(x_assess,y_assess)
print ("n_x"+str(n_x))
print ("n_h"+str(n_h))
print ("n_y"+str(n_y))
outout:
n_x 5
n_h 4
n_y 2
----------------------------------------------------------------------------------------------------------------------------------
(2) 初始化引數(initialize the parameter)
def initialize_parameters(n_x,n_h,n_y):
np.random.seed(2)
W1=np.random.randn(n_h,n_x)
b1=np.zeros((n_h,1))
W2=np.random.randn(n_y,n_h)
b2=np.zeros((n_y,1))
assert(W1.shape==(n_h,n_x))
assert(b1.shape==(n_h,1))
assert(W2.shape==(n_y,n_h))
assert(b2.shape==(n_y,1))
parameter={"W1":W1,
"b1":b1,
"W2":W2,
"b2":b2
}
return parameter
parameter=initialize_parameters(n_x,n_h,n_y)
print("W1"+str(parameter["W1"]))
print("b1"+str(parameter["b1"]))
print("W2"+str(parameter["W2"]))
print("b2"+str(parameter["b2"]))
W1[[-0.41675785 -0.05626683 -2.1361961 1.64027081 -1.79343559]
[-0.84174737 0.50288142 -1.24528809 -1.05795222 -0.90900761]
[ 0.55145404 2.29220801 0.04153939 -1.11792545 0.53905832]
[-0.5961597 -0.0191305 1.17500122 -0.74787095 0.00902525]]
b1[[ 0.]
[ 0.]
[ 0.]
[ 0.]]
W2[[-0.87810789 -0.15643417 0.25657045 -0.98877905]
[-0.33882197 -0.23618403 -0.63765501 -1.18761229]]
b2[[ 0.]
[ 0.]]
(3) 執行正向傳播(forward propagation)
def forward_propagation_test_case(): ##測試forward_propagation()這個函式是否正確
np.random.seed(1)
x_assess=np.random.randn(2,3)
parameters = {'W1': np.array([[-0.00416758, -0.00056267],
[-0.02136196, 0.01640271],
[-0.01793436, -0.00841747],
[ 0.00502881, -0.01245288]]),
'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
'b1': np.array([[ 0.],
[ 0.],
[ 0.],
[ 0.]]),
'b2': np.array([[ 0.]])}
return x_assess,parameters
def forward_propagation(X,parameter):
W1=parameter["W1"]
b1=parameter["b1"]
W2=parameter["W2"]
b2=parameter["b2"]
Z1=np.dot(W1,X)+b1
A1=(np.exp(Z1)-np.exp(-Z1))/(np.exp(Z1)+np.exp(-Z1))
assert(A1.shape==(n_h,X.shape[1]))
Z2=np.dot(W2,A1)+b2
A2=1/(1+np.exp(-Z2))
assert(A2.shape==(1,X.shape[1]))
cache={"Z1":Z1,"A1":A1,"Z2":Z2,"A2":A2}
return cache
x_assess,parameters=forward_propagation_test_case()
cache=forward_propagation(x_assess,parameters)
print (np.mean(cache['Z1']),np.mean(cache['A1']),np.mean(cache['Z2']),np.mean(cache['A2']))
output:
(-0.00049975577774199131, -0.00049696335323178595, 0.00043818745095914593, 0.50010954685243103)
(4)緊接著計算代價函式:
def compute_cost(A2,Y_assess,parameters):
m=Y_assess.shape[1]
cost=(-1.0/m)*np.sum(np.multiply(np.log(A2),Y_assess)+np.multiply(np.log(1-A2),1-Y_assess)) #這個式子執行了(13)的公式
cost=np.squeeze(cost)
assert(isinstance(cost,float))
return cost
def compute_cost_test_case(): #為了測試compute_cost這個函式是否存在
np.random.seed(1)
Y_assess = np.random.randn(1, 3)
parameters = {'W1': np.array([[-0.00416758, -0.00056267],
[-0.02136196, 0.01640271],
[-0.01793436, -0.00841747],
[ 0.00502881, -0.01245288]]),
'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
'b1': np.array([[ 0.],
[ 0.],
[ 0.],
[ 0.]]),
'b2': np.array([[ 0.]])}
a2 = (np.array([[ 0.5002307 , 0.49985831, 0.50023963]]))
return a2, Y_assess, parameters
A2,Y_assess,parameters=compute_cost_test_case()
cost=compute_cost(A2,Y_assess,parameters)
print("cost "+str(cost))
cost 0.692919893776
(5)反向傳播(back_propagation)
反向傳播在深度學習是最難的部分,以下是反向傳播公式的公式:
def backward_propagation(parameters,cache,X,Y):
m=X.shape[1]
W2=parameters["W2"]
A1=cache["A1"]
A2=cache["A2"]
dZ2=A2-Y
dW2=(1.0/m)*np.dot(dZ2,A1.T)
db2=(1.0/m)*np.sum(dZ2,axis=1,keepdims=True)
dZ1=np.dot(W2.T,dZ2)*(1-np.power(A1,2))
dW1=(1.0/m)*np.dot(dZ1,X.T)
db1=(1.0/m)*np.sum(dZ1,axis=1,keepdims=True)
grads={"dW1":dW1,
"db1":db1,
"dW2":dW2,
"db2":db2}
return grads
(6)更新其引數(udate parameter)
def update_parameters(parameters,grads,learning_rates=1.2):
W1=parameters["W1"]
W2=parameters["W2"]
b1=parameters["b1"]
b2=parameters["b2"]
dW1=grads["dW1"]
dW2=grads["dW2"]
db1=grads["db1"]
db2=grads["db2"]
W1=W1-learning_rates*dW1
b1=b1-learning_rates*db1
W2=W2-learning_rates*dW2
b2=b2-learning_rates*db2
parameters={"W1":W1,
"b1":b1,
"W2":W2,
"b2":b2}
return parameters
(7)整合以上功能(integrate above part in nn_model)
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
np.random.seed(3)
n_x = layer_sizes(X, Y)[0]
n_y = layer_sizes(X, Y)[2]
parameters = initialize_parameters(n_x, n_h, n_y) #初始化元素
costs=[] #建立損失函式的list
for i in range(0, num_iterations):
A2, cache = forward_propagation(X, parameters) #正向傳播
cost = compute_cost(A2, Y)
costs.append(cost)
# Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
grads = backward_propagation(parameters, cache, X, Y)
# Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
parameters = update_parameters(parameters, grads, learning_rates = 1.2) #更新元素
### END CODE HERE ###
# Print the cost every 1000 iterations
if print_cost and i % 1000 == 0:
print ("Cost after iteration %i: %f" %(i, cost))
return parameters,costs
def predict(parameters,X): #預測元素結果
A2,cache=forward_propagation(X,parameters)
predictions=(A2>0.5)
return predictions
Cost after iteration 0: 1.127380
Cost after iteration 1000: 0.288553
Cost after iteration 2000: 0.276386
Cost after iteration 3000: 0.268077
Cost after iteration 4000: 0.263069
Cost after iteration 5000: 0.259617
Cost after iteration 6000: 0.257070
Cost after iteration 7000: 0.255105
Cost after iteration 8000: 0.253534
Cost after iteration 9000: 0.252245
(8)進行不同學習率及隱含層神經元個數的測試
learning_rate=[0.01,0.05,0.1,1.5,2.0] #不同學習率的測試
cost_dic={}
parameter_dic={}
for i in learning_rate:
parameter_dic[str(i)],cost_dic[str(i)]=nn_model(X, Y, n_h=4, num_iterations = 10000,learning_rates=i,print_cost=False)
for i in learning_rate:
plt.plot(np.squeeze(cost_dic[str(i)]),label=(str(i)+" learning rates"))
plt.xlabel=('iteration')
plt.ylabel=('cost')
legend = plt.legend(loc='upper center', shadow=True)
frame = legend.get_frame()
frame.set_facecolor('0.90')
plt.show()
plt.figure(figsize=(16, 32)) #調整顯示的figure大小
hidden_layer_sizes=[1,2,3,4,5,20,50] #不同的隱含層的個數
precision=[]
for i,n_h in enumerate(hidden_layer_sizes):
plt.subplot(5,4,i+1)
tic=time.time()
parameters,costs=nn_model(X,Y,n_h,num_iterations=5000)
plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y)
toc=time.time()
predictions = predict(parameters, X)
time_consumption=toc-tic
accuracy_per= float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100)
plt.title('Layer Size {size},precision:{precision} %,time: {time} s'.format(size=n_h,precision=accuracy_per,time=float(int(time_consumption*1000))/1000))
print ("Accuracy for {} hidden units: {} %".format(n_h, accuracy_per)+",time consumption:"+str(float(int(time_consumption*1000))/1000)+"s")
可以看得出,在規定範圍內,學習率越大,收斂的速度會越快
隨著神經網路神經元數的增加,時間增加是必定的,但是準確率並不一定上升,因為隨著神經元個數的增加,會出現過擬合現象,導致測試集的訓練降低。
5.Perform on other datasets
def load_extra_datasets(): #匯入不同的資料型別
N = 200
noisy_circles = sklearn.datasets.make_circles(n_samples=N, factor=.5, noise=.3)
noisy_moons = sklearn.datasets.make_moons(n_samples=N, noise=.2)
blobs = sklearn.datasets.make_blobs(n_samples=N, random_state=5, n_features=2, centers=6)
gaussian_quantiles = sklearn.datasets.make_gaussian_quantiles(mean=None, cov=0.5, n_samples=N, n_features=2, n_classes=2, shuffle=True, random_state=None)
no_structure = np.random.rand(N, 2), np.random.rand(N, 2)
return noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure
noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure = load_extra_datasets()
datasets = {"noisy_circles": noisy_circles,
"noisy_moons": noisy_moons,
"blobs": blobs,
"gaussian_quantiles": gaussian_quantiles} #建立不同生成型別的資料
### START CODE HERE ###(choose your dataset)
dataset = "noisy_moons"
### END CODE HERE ###
X, Y = datasets[dataset]
X, Y = X.T, Y.reshape(1, Y.shape[0])
以上各型別生成的資料分佈如下:
選取其中一個分佈(這裡使用的是noisy_moons分佈),測試上述演算法,得到如下結果: