1. 程式人生 > >Softmax程式碼實現(Python,附測試)

Softmax程式碼實現(Python,附測試)

import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn import datasets

#計算假設的“相對概率”分佈,注意防止指數運算資料溢位  dataset: m*(n+1)    theta: k*(n+1)  m:樣本數   n:特徵數   k:標籤類別數
def Hypothesis(theta,dataset):
    score=np.dot(theta,dataset.T)
    a=np.max(score,axis=0)
    exp_score=np.exp(score-a)
    sum_score=np.sum(exp_score,axis=0)
    relative_probability=exp_score/sum_score
    return relative_probability

#計算損失函式
#theta為引數矩陣k*(n+1)
def Cost_function(theta,dataset,labels,lamda):
    m,n=dataset.shape
    new_code=One_hot_encode(labels)
    log_probability = np.log(Hypothesis(theta,dataset))
    cost = -1/m * np.sum(np.multiply(log_probability,new_code)) + lamda * np.sum(theta)/2
    return cost

#對標籤進行獨熱編碼
#new_code為 k*m  k為標籤數 m為樣本數
def One_hot_encode(labels):
    m=len(labels)
    k=len(np.unique(labels))
    new_code=np.zeros((k,m))
    for i in range(m):
        new_code[labels[i],i]=1
    return new_code

#進行梯度檢驗
def Gradient_checking(gradient,theta,EPSILON,eps,dataset,labels,lamda):
    theta_vector= theta.ravel()  #將引數矩陣向量化
    num=len(theta_vector)
    vector=np.zeros(num)
    for i in range(num):
        vector[i]=1
        theta_plus= theta_vector + EPSILON * vector  #將已求得引數進行微調求近似梯度
        theta_minus = theta_vector - EPSILON * vector
        approxiamte_gradient=(Cost_function(theta_plus.reshape(theta.shape),dataset,labels,lamda)-\
                             Cost_function(theta_minus.reshape(theta.shape),dataset,labels,lamda))/(2*EPSILON)
        vector[i]=0
        a = abs(approxiamte_gradient-gradient[i])
        if a > eps:
            return False
    if np.linalg.norm(approxiamte_gradient-gradient,ord=2)/(np.linalg.norm(approxiamte_gradient,ord=2))> eps:
        return False
    return True

#使用Batch Gradient Descent優化損失函式
#迭代終止條件:  1:達到最大迭代次數   2:前後兩次梯度變化小於一個極小值   3:迭代前後損失函式值變化極小
#dataset為原始資料集:m*n     labels:標籤   lamda:正則項係數   learning_rate:學習率   max_iter:最大迭代次數
#eps1:損失函式變化量的閾值  eps2:梯度變化量閾值
def SoftmaxRegression(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS):
    loss_record=[]
    m,n = dataset.shape
    k = len(np.unique(labels))
    new_code = One_hot_encode(labels)
    iter = 0
    new_cost = 0
    cost = 0
    dataset=np.column_stack((dataset,np.ones(m)))
    theta = np.random.random((k,n+1))
    gradient = np.zeros(n)
    while iter < max_iter:
        new_theta = theta.copy()
        temp = new_code - Hypothesis(new_theta,dataset)
        for j in range(k):
            sum = np.zeros(n+1)
            for i in range(m):
                a=dataset[i,:]
                sum += a * temp[j,i]
            j_gradient=-1/m * sum + lamda * new_theta[j,:] #計算屬於第j類相對概率的梯度向量
            new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient
        iter += 1
        print("第"+str(iter)+"輪迭代的引數:")
        print(new_theta)
        new_cost = Cost_function(new_theta,dataset,labels,lamda)
        loss_record.append(new_cost)
        print(new_theta)
        print("損失函式變化量:" + str(abs(new_cost-cost)))
        if abs(new_cost-cost) < eps1:
            break
        theta = new_theta
    return theta,loss_record

def SoftmaxRegression2(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS):
    loss_record=[]
    m,n = dataset.shape
    k = len(np.unique(labels))
    new_code = One_hot_encode(labels)
    iter = 0
    new_cost = 0
    cost = 0
    dataset=np.column_stack((dataset,np.ones(m)))
    theta = np.random.random((k,n+1))
    gradient = np.zeros(n)
    while iter < max_iter:
        new_theta = theta.copy()
        temp = new_code - Hypothesis(new_theta,dataset)
        for j in range(k):
            sum = np.zeros(n+1)
            for i in range(m):
                a=dataset[i,:]
                sum += a * temp[j,i]
            j_gradient=-1/m * sum + lamda * new_theta[j,:] #計算屬於第j類相對概率的梯度向量
            new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient
        iter += 1
        print("第"+str(iter)+"輪迭代的引數:")
        print(new_theta)
        new_cost = Cost_function(new_theta,dataset,labels,lamda)
        loss_record.append(new_cost)
        print(new_theta)
        print("損失函式變化量:" + str(abs(new_cost-cost)))
        if abs(new_cost-cost) < eps1:
            break
        theta = new_theta
    return theta,loss_record

def Classification(theta,dataset):
    X=dataset.copy()
    X=np.column_stack((X,np.ones(X.shape[0])))
    relative_probability=Hypothesis(theta,X)
    return np.argmax(relative_probability,axis=0)

測試:

iris= datasets.load_iris()
X=iris.data
y = iris.target
target_names = iris.target_names

theta,loss_record=SoftmaxRegression(dataset=X,labels=y,lamda=0.1,learning_rate=1e-4,max_iter=500000,eps1=1e-6,eps2=1e-4,EPS=1e-6)
predict=Classification(theta,X)
(predict==y).astype(np.int).mean()  #訓練集上精度
plt.plot(np.arange(len(loss_record)),loss_record)

損失函式迭代曲線(座標軸沒標清楚請原諒,不想畫圖惹)如下: