scikit-learn幾種常用演算法的比較(code)
阿新 • • 發佈:2018-11-09
from sklearn import datasets import numpy as np iris =datasets.load_iris() X = iris.data[:,[2,3]] y = iris.target np.unique(y) #np.unique(y)返回儲存在iris.target中的各類花朵的類標 #使用scikit_learn中cross_validation模組中的train_test_split函式,隨機將資料矩陣X與類標向量 #y按照3:7的比例劃分為測試資料集和訓練資料集 from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0) #使用scikit_learn的preprocessing模組中的StandardScaler類對特徵進行標準處理 from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) #訓練感知器 from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter = 40, eta0 = 0.1, random_state = 0) ppn.fit(X_train_std, y_train) #使用predict方法對測試資料集進行預測 y_pred = ppn.predict(X_test_std) print('Misclassified Samples:%d' %(y_test != y_pred).sum()) #繪製訓練模型的決策區域 from matplotlib.colors import ListedColormap import matplotlib.pyplot as plt #繪製決策區域函式的定義 def plot_decision_regions(X, y, classifier, test_idx = None, resolution =0.01): #設定顏色表 markers = ('s', 'x', 'o', '^', '*') colors = ('red', 'cyan', 'blue', 'yellow', 'lightred') cmap = ListedColormap(colors[:len(np.unique(y))]) #繪製決策區域 x1_min, x1_max = X[:,0].min()-1, X[:,0].max()+1 x2_min, x2_max = X[:,1].min()-1, X[:,1].max()+1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha = 0.4, cmap = cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) #繪製所有例項 X_test, y_test = X[test_idx, :], y[test_idx] for idx,c1 in enumerate(np.unique(y)): plt.scatter(x = X[y == c1, 0], y =X[y == c1, 1], alpha = 0.8, c = cmap(idx), marker = markers[idx], label = c1) #高亮測試例項 if test_idx: X_test, y_test = X[test_idx,:], y[test_idx] plt.scatter(X_test[:,0], X_test[:,1], c = '', alpha = 1.0, linewidth = 1, marker = 'o', s = 55, label = 'test label') X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) #LR from sklearn.linear_model import LogisticRegression #樸素貝葉斯: from sklearn.naive_bayes import GaussianNB #K-近鄰: from sklearn.neighbors import KNeighborsClassifier #決策樹: from sklearn.tree import DecisionTreeClassifier #支援向量機: from sklearn import svm lr = LogisticRegression(C=1000) lr.fit(X_train_std, y_train) gnb = GaussianNB() gnb.fit(X_train_std, y_train) knc = KNeighborsClassifier() knc.fit(X_train_std, y_train) dt = DecisionTreeClassifier() dt.fit(X_train_std, y_train) svmLR = svm.LinearSVR() svmLR.fit(X_train_std, y_train) plt.figure() plot_decision_regions(X = X_combined_std, y = y_combined, classifier = lr, #此處使用分類器ppn或者lr test_idx = range(105, 150)) plt.title('LinearRegression') plt.figure() plot_decision_regions(X = X_combined_std, y = y_combined, classifier = gnb, #此處使用分類器ppn或者lr test_idx = range(105, 150)) plt.title('GaussianNB') plt.figure() plot_decision_regions(X = X_combined_std, y = y_combined, classifier = knc, #此處使用分類器ppn或者lr test_idx = range(105, 150)) plt.title('KNeighborsClassifier') plt.figure() plot_decision_regions(X = X_combined_std, y = y_combined, classifier = dt, #此處使用分類器ppn或者lr test_idx = range(105, 150)) plt.title('DecisionTreeClassifier') plt.figure() plot_decision_regions(X = X_combined_std, y = y_combined, classifier = svmLR, #此處使用分類器ppn或者lr test_idx = range(105, 150)) plt.title('svm.LinearSVR') plt.xlabel("長度") plt.ylabel("寬度") plt.legend(loc = 'upper left') plt.show()
這些程式碼是在python3.6上執行的,時間緊促暫時沒有分析結果,後面修改!