小白學習機器學習---第三章(2):對數機率迴歸python實現
阿新 • • 發佈:2019-01-31
上程式碼~~~~~~~~
###梯度下降法實現多元線性迴歸 def loadDataSet(): ###資料匯入函式### dataMatrix=[] #資料矩陣,第一列是w=1.0,第2,3列是特徵 labelMatrix=[] #標籤矩陣 # myArr=[[-3.5,-3,0],[-2.3,0,0],[-1.0,-0.1,0],[-1.3, -1.0, 0], # [-2.5, 5, 0],[-3.5, 7, 1], [-1.5, 16, 1],[1, 10, 1], # [1, 5, 1],[1, 3, 0]] myArr=[[0.697,0.460,1],[0.774,0.376,1],[0.634,0.264,1],[0.608,0.318,1],[0.556,0.215,1],[0.403,0.211,1],[0.481,0.149,1],[0.437,0.211,1], [0.666,0.091,0],[0.243,0.267,0],[0.245,0.057,0],[0.343,0.099,0],[0.639,0.161,0],[0.657,0.198,0],[0.360,0.370,0],[0.593,0.042,0]] for itemArr in myArr: dataMatrix.append([1.0,float(itemArr[0]),float(itemArr[1])]) #將X變為(1,X) W=(b:W) labelMatrix.append(int(itemArr[2])) #這樣Y=W0*1+W1x1+W2x2+....+WdXd=X*W # print(dataMatrix) # print(labelMatrix) # print('m,n:',shape(dataMatrix)) return dataMatrix,labelMatrix def logistic(x): #計算logistic函式的值 return 1.0/(1+exp(-x)) def gradAscent(dataIn,classLabels): ###梯度下降演算法,求出最佳的w引數矩陣 ###梯度上升和梯度下降是可以相互轉化的,將上升的東西加個負號就變成下降的東西; dataMatrix=mat(dataIn) #dataIn的格式為:[1.0,第一特徵,第二特徵] labelMatrix=mat(classLabels).transpose() #標籤向量轉置為列矩陣 m,n=shape(dataMatrix) #矩陣的行 列 alpha=0.001 #步長 maxCycle=500 #步數 weights=ones((n,1)) #ones 返回一個指定尺寸的陣列,即用1來填充n*1的陣列 #print(type(weights)) 此時weights初始化為陣列 # print(weights) for k in range(maxCycle): h=logistic(dataMatrix*weights) # print('h: ',h) error=(h-labelMatrix) weights=weights-alpha*dataMatrix.transpose()*error #套用W的變化公式,alpha後面的即為代價函式對w矩陣求導之後的東西 # print(type(weights)) #此時weights由陣列變成了矩陣 return weights def plotBestFit(weights,labelMatrix): ###畫出最佳擬合直線 import matplotlib.pyplot as plt dataArr=array(dataMatrix) #矩陣轉換為陣列 #print(dataArr[1]) n=shape(dataArr)[0] #獲得樣本數量,即data矩陣的行數 # print(n) xcord1=[];ycord1=[] xcord2=[];ycord2=[] for i in range(n): if(int(labelMatrix[i])==1): xcord1.append(dataArr[i,1]);ycord1.append(dataArr[i,2]) else: xcord2.append(dataArr[i,1]);ycord2.append(dataArr[i,2]) fig=plt.figure() #在子圖中畫出樣本點 ax=fig.add_subplot(111) ax.scatter(xcord1,ycord1,s=30,c='red',marker='s') ax.scatter(xcord2,ycord2,s=30,c='green') ax.scatter(0.719,0.103,s=30,c='blue') #畫出擬合直線 # x=arange(-3.0,3.0,0.1) #此時X是有60個元素的陣列 # print('y:',(-weights[0]-weights[1]*x)/weights[2]) # print(shape(array((-weights[0]-weights[1]*x)/weights[2]))) #(1,60) # y=array((-weights[0]-weights[1]*x)/weights[2])[0] #即畫出ln(y/1-y)=0,即y/(1-y)=1,即logistic=0.5的分界線, # 需要將計算結果矩陣轉換為陣列,且和x的大小匹配 x=arange(0,1,0.001) y=array((-weights[0]-weights[1]*x)/weights[2])[0]#即畫出ln(y/1-y)=0,即y/(1-y)=1,即logistic=0.5的分界線, # 需要將計算結果矩陣轉換為陣列,且和x的大小匹配 ax.plot(x,y) # plt.xlabel('X1');plt.ylabel('X2') plt.xlabel('密度');plt.ylabel('含糖率') plt.show() dataMatrix,labelMatrix=loadDataSet() weights=gradAscent(dataMatrix,labelMatrix) plotBestFit(weights,labelMatrix)