Python3神經網路,經典簡單示例sigmoid啟用函式
選用了sigmoid作為啟用函式,作為輸出層的計算(多分類版本的logistic迴歸),影響輸出層的delta計算;
選用了squared-error作為損失函式(注:會影響calculate_loss函式的計算以及輸出層的delta計算)
__author__ = 'http://clayandgithub.github.io/' import numpy as np from sklearn import datasets, linear_model import matplotlib.pyplot as plt class NNModel: Ws = [] # params W of the whole network bs = [] # params b of the whole network layers = [] # number of nodes in each layer epsilon = 0.01 # default learning rate for gradient descent reg_lambda = 0.01 # default regularization strength def __init__(self, layers, epsilon = 0.01, reg_lambda = 0.01): self.layers = layers self.epsilon = epsilon self.reg_lambda = reg_lambda self.init_params() # Initialize the parameters (W and b) to random values. We need to learn these. def init_params(self): np.random.seed(0) layers = self.layers hidden_layer_num = len(layers) - 1 Ws = [1] * hidden_layer_num bs = [1] * hidden_layer_num for i in range(0, hidden_layer_num): Ws[i] = np.random.randn(layers[i], layers[i + 1]) / np.sqrt(layers[i]) bs[i] = np.zeros((1, layers[i + 1])) self.Ws = Ws self.bs = bs # This function learns parameters for the neural network from training dataset # - num_passes: Number of passes through the training data for gradient descent # - print_loss: If True, print the loss every 1000 iterations def train(self, X, y, num_passes=20000, print_loss=False): num_examples = len(X) expected_output = self.transform_output_dimension(y) # Gradient descent. For each batch... for i in range(0, num_passes): # Forward propagation a_output = self.forward(X) # Backpropagation dWs, dbs = self.backward(X, expected_output, a_output) # Update parameters of the model self.update_model_params(dWs, dbs, num_examples) # Optionally print the loss. # This is expensive because it uses the whole dataset, so we don't want to do it too often. if print_loss and i % 1000 == 0: print("Loss after iteration %i: %f" % (i, self.calculate_loss(X, expected_output))) # Helper function to evaluate the total loss on the dataset def calculate_loss(self, X, expected_output): output_shape = expected_output.shape num_output = output_shape[0]#training set size dimension_output = output_shape[1]# output dimension # Forward propagation to calculate our predictions a_output = self.forward(X) current_output = a_output[-1] # Calculating the loss data_loss = np.sum(np.square(current_output - expected_output) / 2) # Add regulatization term to loss (optional) for W in self.Ws: data_loss += self.reg_lambda / 2 * np.sum(np.square(W)) return 1. / num_output * data_loss # Forward propagation def forward(self, X): Ws = self.Ws bs = self.bs hidden_layer_num = len(Ws) a_output = [1] * hidden_layer_num current_input = X for i in range(0, hidden_layer_num - 1): w_current = Ws[i] b_current = bs[i] z_current = current_input.dot(w_current) + b_current a_current = sigmoid(z_current) a_output[i] = a_current current_input = a_current #output layer(logistic) z_current = current_input.dot(Ws[hidden_layer_num - 1]) + bs[hidden_layer_num - 1] a_current = sigmoid(z_current) a_output[hidden_layer_num - 1] = a_current return a_output # Predict the result of classification of input x def predict(self, x): a_output = self.forward(x) return np.argmax(a_output[-1], axis=1) # Backpropagation def backward(self, X, expected_output, a_output): Ws = self.Ws bs = self.bs hidden_layer_num = len(Ws) num_examples = len(X) ds = [1] * hidden_layer_num # output layer a_current = a_output[-1] d_current = -(expected_output - a_current) * a_current * (1 - a_current) ds[hidden_layer_num - 1] = d_current #other hidden layer for l in range(hidden_layer_num - 2, -1, -1): w_current = Ws[l + 1] a_current = a_output[l] d_current = np.dot(d_current, w_current.T) * a_current * (1 - a_current) ds[l] = d_current #calc dW && db dWs = [1] * hidden_layer_num dbs = [1] * hidden_layer_num a_last = X num_output = len(X) for l in range(0, hidden_layer_num): d_current = ds[l] dWs[l] = np.dot(a_last.T, d_current) dbs[l] = np.sum(d_current, axis=0, keepdims=True) a_last = a_output[l] return dWs, dbs # Update the params (Ws and bs) of the netword during Backpropagation def update_model_params(self, dWs, dbs, num_examples): Ws = self.Ws bs = self.bs hidden_layer_num = len(Ws) for l in range(0, hidden_layer_num): Ws[l] = Ws[l] - self.epsilon * (dWs[l] + self.reg_lambda * Ws[l]) bs[l] = bs[l] - self.epsilon * (dbs[l]) #Ws[l] = Ws[l] - self.epsilon * (dWs[l] / num_examples + model.reg_lambda * Ws[l]) #bs[l] = bs[l] - self.epsilon * (dbs[l] / num_examples) self.Ws = Ws self.bs = bs # tranform the label matrix to output matrix (i.e. [0, 1, 1]->[[1, 0], [0, 1], [0, 1]]) def transform_output_dimension(self, y): class_num = np.max(y) + 1 # start with 0 examples_num = len(y) output = np.zeros((examples_num, class_num)) output[range(examples_num), y] += 1 return output def sigmoid(x): return 1 / (1 + np.exp(-x)) def generate_data(random_seed, n_samples): np.random.seed(random_seed) X, y = datasets.make_moons(n_samples, noise=0.20) return X, y def visualize(X, y, model): plt.title("sigmoid_squared_error_ann_classification") plot_decision_boundary(lambda x:model.predict(x), X, y) def plot_decision_boundary(pred_func, X, y): # Set min and max values and give it some padding x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 h = 0.01 # Generate a grid of points with distance h between them xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Predict the function value for the whole gid Z = pred_func(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Plot the contour and training examples plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral) plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral) plt.show() class Config: # Gradient descent parameters epsilon = 0.01 # learning rate for gradient descent reg_lambda = 0.01 # regularization strength layers = [2, 4, 2] # number of nodes in each layer def main(): X, y = generate_data(6, 200) model = NNModel(Config.layers, Config.epsilon, Config.reg_lambda) model.train(X, y, print_loss=True) visualize(X, y, model) if __name__ == "__main__": main()
結果:
Loss after iteration 0: 0.251743
Loss after iteration 1000: 0.085999
Loss after iteration 2000: 0.043758
Loss after iteration 3000: 0.031237
Loss after iteration 4000: 0.028426
Loss after iteration 5000: 0.027443
Loss after iteration 6000: 0.027016
Loss after iteration 7000: 0.026799
Loss after iteration 8000: 0.026676
Loss after iteration 9000: 0.026602
Loss after iteration 10000: 0.026555
Loss after iteration 11000: 0.026525
Loss after iteration 12000: 0.026505
Loss after iteration 13000: 0.026491
Loss after iteration 14000: 0.026481
Loss after iteration 15000: 0.026474
Loss after iteration 16000: 0.026468
Loss after iteration 17000: 0.026464
Loss after iteration 18000: 0.026461
Loss after iteration 19000: 0.026458