機器學習之LDA線性判別分析模型
阿新 • • 發佈:2018-11-23
- 機器學習之LDA線性判別分析模型
# -*- coding: utf-8 -*- """ Created on Wed Nov 21 21:03:14 2018 @author: muli """ import matplotlib.pyplot as plt import numpy as np from sklearn import datasets, discriminant_analysis,cross_validation def load_data(): ''' 載入用於分類問題的資料集 :return: 一個元組,用於分類問題。元組元素依次為:訓練樣本集、測試樣本集、訓練樣本集對應的標記、測試樣本集對應的標記 ''' # 使用 scikit-learn 自帶的 iris 資料集 iris=datasets.load_iris() X_train=iris.data y_train=iris.target # 分層取樣拆分成訓練集和測試集,測試集大小為原始資料集大小的 1/4 return cross_validation.train_test_split(X_train, y_train,test_size=0.25, random_state=0,stratify=y_train) def test_LinearDiscriminantAnalysis(*data): ''' 測試 LinearDiscriminantAnalysis 的用法 param data: 可變引數。它是一個元組,這裡要求其元素依次為:訓練樣本集、測試樣本集、訓練樣本的標記、測試樣本的標記 return: None ''' X_train,X_test,y_train,y_test=data # 載入LDA線性判別分析模型 lda = discriminant_analysis.LinearDiscriminantAnalysis() # 訓練模型 lda.fit(X_train, y_train) # 返回 W值 和 b值 print('Coefficients:%s, intercept %s'%(lda.coef_,lda.intercept_)) # 返回預測的準確率 print('Score: %.2f' % lda.score(X_test, y_test)) def plot_LDA(converted_X,y): ''' 繪製經過 LDA 轉換後的資料 :param converted_X: 經過 LDA轉換後的樣本集 :param y: 樣本集的標記 :return: None ''' from mpl_toolkits.mplot3d import Axes3D fig=plt.figure() ax=Axes3D(fig) colors='rgb' markers='o*s' for target,color,marker in zip([0,1,2],colors,markers): pos=(y==target).ravel() X=converted_X[pos,:] ax.scatter(X[:,0], X[:,1], X[:,2],color=color,marker=marker, label="Label %d"%target) ax.legend(loc="best") fig.suptitle("Iris After LDA") plt.show() def run_plot_LDA(): ''' 執行 plot_LDA 。 其中資料集來自於 load_data() 函式 :return: None ''' X_train,X_test,y_train,y_test=load_data() X=np.vstack((X_train,X_test)) Y=np.vstack((y_train.reshape(y_train.size,1),y_test.reshape(y_test.size,1))) lda = discriminant_analysis.LinearDiscriminantAnalysis() lda.fit(X, Y) converted_X=np.dot(X,np.transpose(lda.coef_))+lda.intercept_ plot_LDA(converted_X,Y) def test_LinearDiscriminantAnalysis_solver(*data): ''' 測試 LinearDiscriminantAnalysis 的預測效能隨 solver 引數的影響 :param data: 可變引數。它是一個元組,這裡要求其元素依次為:訓練樣本集、測試樣本集、訓練樣本的標記、測試樣本的標記 :return: None ''' X_train,X_test,y_train,y_test=data solvers=['svd','lsqr','eigen'] for solver in solvers: if(solver=='svd'): lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver) else: lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver, shrinkage=None) lda.fit(X_train, y_train) print('Score at solver=%s: %.2f' %(solver, lda.score(X_test, y_test))) def test_LinearDiscriminantAnalysis_shrinkage(*data): ''' 測試 LinearDiscriminantAnalysis 的預測效能隨 shrinkage 引數的影響 :param data: 可變引數。它是一個元組,這裡要求其元素依次為:訓練樣本集、測試樣本集、訓練樣本的標記、測試樣本的標記 :return: None ''' X_train,X_test,y_train,y_test=data shrinkages=np.linspace(0.0,1.0,num=20) scores=[] for shrinkage in shrinkages: lda = discriminant_analysis.LinearDiscriminantAnalysis(solver='lsqr', shrinkage=shrinkage) lda.fit(X_train, y_train) scores.append(lda.score(X_test, y_test)) ## 繪圖 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(shrinkages,scores) ax.set_xlabel(r"shrinkage") ax.set_ylabel(r"score") ax.set_ylim(0,1.05) ax.set_title("LinearDiscriminantAnalysis") plt.show() if __name__=='__main__': X_train,X_test,y_train,y_test=load_data() # 產生用於分類的資料集 # 呼叫 test_LinearDiscriminantAnalysis # test_LinearDiscriminantAnalysis(X_train,X_test,y_train,y_test) # 呼叫 run_plot_LDA # run_plot_LDA() # 呼叫 test_LinearDiscriminantAnalysis_solver # test_LinearDiscriminantAnalysis_solver(X_train,X_test,y_train,y_test) # 呼叫 test_LinearDiscriminantAnalysis_shrinkage test_LinearDiscriminantAnalysis_shrinkage(X_train,X_test,y_train,y_test)