1. 程式人生 > >機器學習實戰-邏輯回歸

機器學習實戰-邏輯回歸

one har 改進 lin [] readlines 機器 iter sun

邏輯回歸:簡單的來說,在線性回歸的基礎上加入了Sigmoid函數!

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
#加載數據集
def loadData(filename):
    dataMat = []
    labelMat = []
    with open(filename) as f:
        for line in f.readlines():
            line = line.strip().split()
            dataMat.append([
1,float(line[0]),float(line[1])]) labelMat.append(int(line[2])) return dataMat,labelMat #繪制數據集 def plot(dataMat,labelMat): x0cord1 = [] x0cord2 = [] x1cord1 = [] x1cord2 = [] n = len(labelMat) for i in range(n): if labelMat[i] == 1: x0cord1.append(dataMat[i][
1]) x0cord2.append(dataMat[i][2]) else: x1cord1.append(dataMat[i][1]) x1cord2.append(dataMat[i][2]) plt.scatter(x0cord1,x0cord2,c=red,s=20,alpha=0.5,marker=s) plt.scatter(x1cord1,x1cord2,c=green,s=20,alpha=0.5) plt.title(DataSet) plt.xlabel(
x1) plt.ylabel(x2) plt.show() #隨機梯度上升進行LR訓練 def stogradAscent(dataMat,labelMat,num_iter=150): dataMat = np.array(dataMat) m,n = np.shape(dataMat) #矩陣有多少列 m=100 n=3 weights = np.ones(n) #即待優化的參數[1,1,1] weights_array = np.array([]) for j in range(num_iter): dataIndex = list(range(m)) for i in range(m): alpha = 1 / (i+j+1.0) + 0.001 rangeIndex = int(np.random.uniform(0,len(dataIndex))) error = labelMat[rangeIndex] - sigmoid(sum(dataMat[rangeIndex] * weights)) # 一個數 weights = weights + alpha * dataMat[rangeIndex] * error weights_array = np.append(weights_array,weights,axis=0) del(dataIndex[rangeIndex]) weights_array = weights_array.reshape(num_iter*m,n) return weights,weights_array #批量梯度上升進行LR訓練 def gradAscent(dataMat,labelMat): dataMartix = np.mat(dataMat) labelMartix = np.mat(labelMat).transpose() n = np.shape(dataMartix)[1] #矩陣有多少列 weights = np.ones((n,1)) #即待優化的參數 alpha = 0.001 maxiter = 500 weights_array = np.array([]) for i in range(maxiter): error = labelMartix - sigmoid(dataMartix * weights) # 100×1 weights = weights + alpha * dataMartix.transpose() * error weights_array = np.append(weights_array, weights) weights_array = weights_array.reshape(maxiter,n) return np.asarray(weights),weights_array def sigmoid(x): return 1 / (1 + np.exp(-x)) def plotWeights(weights_array1,weights_array2): #設置漢字格式 font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc", size=14) #將fig畫布分隔成1行1列,不共享x軸和y軸,fig畫布的大小為(13,8) #當nrow=3,nclos=2時,代表fig畫布被分為六個區域,axs[0][0]表示第一行第一列 fig, axs = plt.subplots(nrows=3, ncols=2,sharex=False, sharey=False, figsize=(20,10)) x1 = np.arange(0, len(weights_array1), 1) #繪制w0與叠代次數的關系 axs[0][0].plot(x1,weights_array1[:,0]) axs0_title_text = axs[0][0].set_title(u梯度上升算法:回歸系數與叠代次數關系,FontProperties=font) axs0_ylabel_text = axs[0][0].set_ylabel(uW0,FontProperties=font) plt.setp(axs0_title_text, size=20, weight=bold, color=black) plt.setp(axs0_ylabel_text, size=20, weight=bold, color=black) #繪制w1與叠代次數的關系 axs[1][0].plot(x1,weights_array1[:,1]) axs1_ylabel_text = axs[1][0].set_ylabel(uW1,FontProperties=font) plt.setp(axs1_ylabel_text, size=20, weight=bold, color=black) #繪制w2與叠代次數的關系 axs[2][0].plot(x1,weights_array1[:,2]) axs2_xlabel_text = axs[2][0].set_xlabel(u叠代次數,FontProperties=font) axs2_ylabel_text = axs[2][0].set_ylabel(uW1,FontProperties=font) plt.setp(axs2_xlabel_text, size=20, weight=bold, color=black) plt.setp(axs2_ylabel_text, size=20, weight=bold, color=black) x2 = np.arange(0, len(weights_array2), 1) #繪制w0與叠代次數的關系 axs[0][1].plot(x2,weights_array2[:,0]) axs0_title_text = axs[0][1].set_title(u改進的隨機梯度上升算法:回歸系數與叠代次數關系,FontProperties=font) axs0_ylabel_text = axs[0][1].set_ylabel(uW0,FontProperties=font) plt.setp(axs0_title_text, size=20, weight=bold, color=black) plt.setp(axs0_ylabel_text, size=20, weight=bold, color=black) #繪制w1與叠代次數的關系 axs[1][1].plot(x2,weights_array2[:,1]) axs1_ylabel_text = axs[1][1].set_ylabel(uW1,FontProperties=font) plt.setp(axs1_ylabel_text, size=20, weight=bold, color=black) #繪制w2與叠代次數的關系 axs[2][1].plot(x2,weights_array2[:,2]) axs2_xlabel_text = axs[2][1].set_xlabel(u叠代次數,FontProperties=font) axs2_ylabel_text = axs[2][1].set_ylabel(uW1,FontProperties=font) plt.setp(axs2_xlabel_text, size=20, weight=bold, color=black) plt.setp(axs2_ylabel_text, size=20, weight=bold, color=black) plt.show() def plotBestFit(weights,dataMat,labelMat): x0cord1 = [] x0cord2 = [] x1cord1 = [] x1cord2 = [] n = len(labelMat) for i in range(n): if labelMat[i] == 1: x0cord1.append(dataMat[i][1]) x0cord2.append(dataMat[i][2]) else: x1cord1.append(dataMat[i][1]) x1cord2.append(dataMat[i][2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(x0cord1,x0cord2,c=red,s=20,alpha=0.5,marker=s) ax.scatter(x1cord1,x1cord2,c=green,s=20,alpha=0.5) # plt.title(‘DataSet‘) # plt.xlabel(‘x1‘) # plt.ylabel(‘x2‘) # plt.show() w = - weights[1] / weights[2] b = -weights[0] / weights[2] x = np.arange(-3,3,0.1) y = w * x + b ax.plot(x,y) plt.show() if __name__==__main__: dataMat,labelMat = loadData(testSet.txt) # print(dataMat) # plot(dataMat,labelMat) weights,weights_array1 = stogradAscent(dataMat,labelMat) plotBestFit(weights,dataMat,labelMat) print(weights) weights2,weights_array2 = gradAscent(dataMat,labelMat) # print(weights2) plotWeights(weights_array2, weights_array1)

機器學習實戰-邏輯回歸