1. 程式人生 > >神經網路梯度下降演算法的demo

神經網路梯度下降演算法的demo

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import argparse

# 計算sigmoid函式
def sigmoid_activation(x):
	return 1.0 / (1 + np.exp(-x))
# 根據啟用函式得到的預測值然後進行閾值化到1和0
def predict(X, W):
	preds = sigmoid_activation(X.dot(W))
	preds[preds <= 0.5] = 0
	preds[preds > 0] = 1

	return preds

def next_batch(X, y, batchSize):
	for i in np.arange(0, X.shape[0], batchSize):
		yield (X[i:i+batchSize], y[i:i+batchSize])


def run_basicSGD():
	# 命令列引數解析
	ap = argparse.ArgumentParser()
	ap.add_argument("-e", "--epochs", type=int, default=100, help="# of epochs")
	ap.add_argument("-a", "--alpha", type=float, default=0.01, help="learning rate")
	args = vars(ap.parse_args())
	# 生成資料
	(X, y) = make_blobs(n_samples=10000, n_features=2, centers=2, cluster_std=1.5, random_state=1)
	y = y.reshape((y.shape[0], 1))
	# bias trick將bias新增到W矩陣的另一列,這樣整合W和b到一個矩陣進行引數學習
	X = np.c_[X, np.ones((X.shape[0]))]
	# 分割訓練和測試資料
	(trainX, testX, trainY, testY) = train_test_split(X, y, test_size=0.5, random_state=42)

	print("[INFO] training...")
	W = np.random.randn(X.shape[1], 1)
	print("initialize W = \n", W)
	losses = []
	# 輪次迭代訓練
	for epoch in np.arange(0, args["epochs"]):
		preds = sigmoid_activation(trainX.dot(W))
		# 計算平方誤差
		error = preds - trainY
		loss = np.sum(error ** 2)
		losses.append(loss)


		gradient = trainX.T.dot(error)
		# 更新W的值
		W += -args["alpha"] * gradient
		

		if epoch == 0 or (epoch + 1) % 5 == 0:
			print("[INFO] epoch={}, loss={:.7f}".format(int(epoch + 1), loss))

	print("[INFO] evaluating...")
	# 生成報告
	preds = predict(testX, W)
	print(classification_report(testY, preds))
	# 視覺化訓練過程
	plt.style.use("ggplot")
	plt.figure()
	plt.title("Data")
	cm_dark = mpl.colors.ListedColormap(['g', 'b'])
	plt.scatter(testX[:, 0], testX[:, 1], marker="o", c=testY.ravel(), cmap=cm_dark, s=10)
	# print(testY)

	plt.style.use("ggplot")
	plt.figure()
	plt.plot(np.arange(0, args["epochs"]), losses)
	plt.title("Training Loss")
	plt.xlabel("Epoch #")
	plt.ylabel("Loss")
	plt.show()

	print("W\n", W)

def run_minibatchSGD():
	# 命令列引數解析
	ap = argparse.ArgumentParser()
	ap.add_argument("-e", "--epochs", type=int, default=100, help="# of epochs")
	ap.add_argument("-a", "--alpha", type=float, default=0.01, help="learning rate")
	ap.add_argument("-b", "--batch-size", type=int, default=32, help="size of the SGD mini-batches")
	args = vars(ap.parse_args())
	# 生成資料
	(X, y) = make_blobs(n_samples=10000, n_features=2, centers=2, cluster_std=1.5, random_state=1)
	y = y.reshape((y.shape[0], 1))
	# bias trick將bias新增到W矩陣的另一列,這樣整合W和b到一個矩陣進行引數學習
	X = np.c_[X, np.ones((X.shape[0]))]
	# 分割訓練和測試資料
	(trainX, testX, trainY, testY) = train_test_split(X, y, test_size=0.5, random_state=42)
	print("[INFO] training...")
	W = np.random.randn(X.shape[1], 1)
	losses = []
	# 輪次迭代訓練
	for epoch in np.arange(0, args["epochs"]):
		epochLoss = []

		for (batchX, batchY) in next_batch(X, y, args["batch_size"]):
			preds = sigmoid_activation(batchX.dot(W))
			error = preds - batchY
			epochLoss.append(np.sum(error ** 2))

			gradient = batchX.T.dot(error)
			W += -args["alpha"] * gradient

		loss = np.average(epochLoss)
		losses.append(loss)
		if epoch == 0 or (epoch+1) % 5 == 0:
			print("[INFO] epoch={}, loss={:.7f}".format(int(epoch + 1), loss))

	print("[INFO] evaluting...")
	preds = predict(testX, W)
	print(classification_report(testY, preds))

		# 視覺化訓練過程
	plt.style.use("ggplot")
	plt.figure()
	plt.title("Data")
	cm_dark = mpl.colors.ListedColormap(['g', 'b'])
	plt.scatter(testX[:, 0], testX[:, 1], marker="o", c=testY.ravel(), cmap=cm_dark, s=10)
	# print(testY)

	plt.style.use("ggplot")
	plt.figure()
	plt.plot(np.arange(0, args["epochs"]), losses)
	plt.title("Training Loss")
	plt.xlabel("Epoch #")
	plt.ylabel("Loss")
	plt.show()

	print("W\n", W)

if __name__ == '__main__':
	run_basicSGD()
	# run_minibatchSGD()

標準SGD:

mini-batch SGD: