Softmax程式碼實現(Python,附測試)
阿新 • • 發佈:2018-11-25
import numpy as np import math from matplotlib import pyplot as plt from sklearn import datasets #計算假設的“相對概率”分佈,注意防止指數運算資料溢位 dataset: m*(n+1) theta: k*(n+1) m:樣本數 n:特徵數 k:標籤類別數 def Hypothesis(theta,dataset): score=np.dot(theta,dataset.T) a=np.max(score,axis=0) exp_score=np.exp(score-a) sum_score=np.sum(exp_score,axis=0) relative_probability=exp_score/sum_score return relative_probability #計算損失函式 #theta為引數矩陣k*(n+1) def Cost_function(theta,dataset,labels,lamda): m,n=dataset.shape new_code=One_hot_encode(labels) log_probability = np.log(Hypothesis(theta,dataset)) cost = -1/m * np.sum(np.multiply(log_probability,new_code)) + lamda * np.sum(theta)/2 return cost #對標籤進行獨熱編碼 #new_code為 k*m k為標籤數 m為樣本數 def One_hot_encode(labels): m=len(labels) k=len(np.unique(labels)) new_code=np.zeros((k,m)) for i in range(m): new_code[labels[i],i]=1 return new_code #進行梯度檢驗 def Gradient_checking(gradient,theta,EPSILON,eps,dataset,labels,lamda): theta_vector= theta.ravel() #將引數矩陣向量化 num=len(theta_vector) vector=np.zeros(num) for i in range(num): vector[i]=1 theta_plus= theta_vector + EPSILON * vector #將已求得引數進行微調求近似梯度 theta_minus = theta_vector - EPSILON * vector approxiamte_gradient=(Cost_function(theta_plus.reshape(theta.shape),dataset,labels,lamda)-\ Cost_function(theta_minus.reshape(theta.shape),dataset,labels,lamda))/(2*EPSILON) vector[i]=0 a = abs(approxiamte_gradient-gradient[i]) if a > eps: return False if np.linalg.norm(approxiamte_gradient-gradient,ord=2)/(np.linalg.norm(approxiamte_gradient,ord=2))> eps: return False return True #使用Batch Gradient Descent優化損失函式 #迭代終止條件: 1:達到最大迭代次數 2:前後兩次梯度變化小於一個極小值 3:迭代前後損失函式值變化極小 #dataset為原始資料集:m*n labels:標籤 lamda:正則項係數 learning_rate:學習率 max_iter:最大迭代次數 #eps1:損失函式變化量的閾值 eps2:梯度變化量閾值 def SoftmaxRegression(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS): loss_record=[] m,n = dataset.shape k = len(np.unique(labels)) new_code = One_hot_encode(labels) iter = 0 new_cost = 0 cost = 0 dataset=np.column_stack((dataset,np.ones(m))) theta = np.random.random((k,n+1)) gradient = np.zeros(n) while iter < max_iter: new_theta = theta.copy() temp = new_code - Hypothesis(new_theta,dataset) for j in range(k): sum = np.zeros(n+1) for i in range(m): a=dataset[i,:] sum += a * temp[j,i] j_gradient=-1/m * sum + lamda * new_theta[j,:] #計算屬於第j類相對概率的梯度向量 new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient iter += 1 print("第"+str(iter)+"輪迭代的引數:") print(new_theta) new_cost = Cost_function(new_theta,dataset,labels,lamda) loss_record.append(new_cost) print(new_theta) print("損失函式變化量:" + str(abs(new_cost-cost))) if abs(new_cost-cost) < eps1: break theta = new_theta return theta,loss_record def SoftmaxRegression2(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS): loss_record=[] m,n = dataset.shape k = len(np.unique(labels)) new_code = One_hot_encode(labels) iter = 0 new_cost = 0 cost = 0 dataset=np.column_stack((dataset,np.ones(m))) theta = np.random.random((k,n+1)) gradient = np.zeros(n) while iter < max_iter: new_theta = theta.copy() temp = new_code - Hypothesis(new_theta,dataset) for j in range(k): sum = np.zeros(n+1) for i in range(m): a=dataset[i,:] sum += a * temp[j,i] j_gradient=-1/m * sum + lamda * new_theta[j,:] #計算屬於第j類相對概率的梯度向量 new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient iter += 1 print("第"+str(iter)+"輪迭代的引數:") print(new_theta) new_cost = Cost_function(new_theta,dataset,labels,lamda) loss_record.append(new_cost) print(new_theta) print("損失函式變化量:" + str(abs(new_cost-cost))) if abs(new_cost-cost) < eps1: break theta = new_theta return theta,loss_record def Classification(theta,dataset): X=dataset.copy() X=np.column_stack((X,np.ones(X.shape[0]))) relative_probability=Hypothesis(theta,X) return np.argmax(relative_probability,axis=0)
測試:
iris= datasets.load_iris() X=iris.data y = iris.target target_names = iris.target_names theta,loss_record=SoftmaxRegression(dataset=X,labels=y,lamda=0.1,learning_rate=1e-4,max_iter=500000,eps1=1e-6,eps2=1e-4,EPS=1e-6) predict=Classification(theta,X) (predict==y).astype(np.int).mean() #訓練集上精度 plt.plot(np.arange(len(loss_record)),loss_record)
損失函式迭代曲線(座標軸沒標清楚請原諒,不想畫圖惹)如下: