1. 程式人生 > >小白學習機器學習---第三章(2):對數機率迴歸python實現

小白學習機器學習---第三章(2):對數機率迴歸python實現

上程式碼~~~~~~~~

###梯度下降法實現多元線性迴歸
def loadDataSet():
    ###資料匯入函式###
    dataMatrix=[]    #資料矩陣,第一列是w=1.0,第2,3列是特徵
    labelMatrix=[]  #標籤矩陣
   # myArr=[[-3.5,-3,0],[-2.3,0,0],[-1.0,-0.1,0],[-1.3, -1.0, 0],  
     #      [-2.5, 5, 0],[-3.5, 7, 1], [-1.5, 16, 1],[1, 10, 1],  
     #      [1, 5, 1],[1, 3, 0]]
    myArr=[[0.697,0.460,1],[0.774,0.376,1],[0.634,0.264,1],[0.608,0.318,1],[0.556,0.215,1],[0.403,0.211,1],[0.481,0.149,1],[0.437,0.211,1],
        [0.666,0.091,0],[0.243,0.267,0],[0.245,0.057,0],[0.343,0.099,0],[0.639,0.161,0],[0.657,0.198,0],[0.360,0.370,0],[0.593,0.042,0]]
    for itemArr in myArr:
        dataMatrix.append([1.0,float(itemArr[0]),float(itemArr[1])]) #將X變為(1,X)   W=(b:W) 
        labelMatrix.append(int(itemArr[2]))                         #這樣Y=W0*1+W1x1+W2x2+....+WdXd=X*W
  #  print(dataMatrix)
   # print(labelMatrix)
  #  print('m,n:',shape(dataMatrix))
          
    return dataMatrix,labelMatrix

def logistic(x):
    #計算logistic函式的值
    return 1.0/(1+exp(-x))

def gradAscent(dataIn,classLabels):
    ###梯度下降演算法,求出最佳的w引數矩陣
    ###梯度上升和梯度下降是可以相互轉化的,將上升的東西加個負號就變成下降的東西; 
    dataMatrix=mat(dataIn)  #dataIn的格式為:[1.0,第一特徵,第二特徵]
    labelMatrix=mat(classLabels).transpose() #標籤向量轉置為列矩陣
    m,n=shape(dataMatrix)   #矩陣的行 列
    alpha=0.001    #步長
    maxCycle=500   #步數
    weights=ones((n,1))   #ones  返回一個指定尺寸的陣列,即用1來填充n*1的陣列
    #print(type(weights)) 此時weights初始化為陣列
   # print(weights)
    for k in range(maxCycle):
        h=logistic(dataMatrix*weights)
       # print('h: ',h)
        error=(h-labelMatrix)
        weights=weights-alpha*dataMatrix.transpose()*error #套用W的變化公式,alpha後面的即為代價函式對w矩陣求導之後的東西
   # print(type(weights))   #此時weights由陣列變成了矩陣
    return weights

def plotBestFit(weights,labelMatrix):
    ###畫出最佳擬合直線
    import matplotlib.pyplot as plt
    dataArr=array(dataMatrix) #矩陣轉換為陣列
    #print(dataArr[1])
    n=shape(dataArr)[0] #獲得樣本數量,即data矩陣的行數
   # print(n)
    xcord1=[];ycord1=[]
    xcord2=[];ycord2=[]
    
    for i in range(n):
        if(int(labelMatrix[i])==1):
            xcord1.append(dataArr[i,1]);ycord1.append(dataArr[i,2])
        else:
            xcord2.append(dataArr[i,1]);ycord2.append(dataArr[i,2])
    
    fig=plt.figure()
    #在子圖中畫出樣本點
    ax=fig.add_subplot(111)
    ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')
    ax.scatter(xcord2,ycord2,s=30,c='green')
    ax.scatter(0.719,0.103,s=30,c='blue')
    #畫出擬合直線
   # x=arange(-3.0,3.0,0.1) #此時X是有60個元素的陣列
   # print('y:',(-weights[0]-weights[1]*x)/weights[2])
   # print(shape(array((-weights[0]-weights[1]*x)/weights[2]))) #(1,60)
   # y=array((-weights[0]-weights[1]*x)/weights[2])[0] #即畫出ln(y/1-y)=0,即y/(1-y)=1,即logistic=0.5的分界線,
                                                        # 需要將計算結果矩陣轉換為陣列,且和x的大小匹配
  

    x=arange(0,1,0.001)
    y=array((-weights[0]-weights[1]*x)/weights[2])[0]#即畫出ln(y/1-y)=0,即y/(1-y)=1,即logistic=0.5的分界線,
                                                        # 需要將計算結果矩陣轉換為陣列,且和x的大小匹配
    ax.plot(x,y)
   # plt.xlabel('X1');plt.ylabel('X2')
    plt.xlabel('密度');plt.ylabel('含糖率')
    plt.show()
dataMatrix,labelMatrix=loadDataSet()
weights=gradAscent(dataMatrix,labelMatrix)
plotBestFit(weights,labelMatrix)