1. 程式人生 > >scikit-learn幾種常用演算法的比較(code)

scikit-learn幾種常用演算法的比較(code)

from sklearn import datasets
import numpy as np
iris =datasets.load_iris()
X = iris.data[:,[2,3]]
y = iris.target
np.unique(y)        #np.unique(y)返回儲存在iris.target中的各類花朵的類標 

#使用scikit_learn中cross_validation模組中的train_test_split函式,隨機將資料矩陣X與類標向量
#y按照3:7的比例劃分為測試資料集和訓練資料集
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

#使用scikit_learn的preprocessing模組中的StandardScaler類對特徵進行標準處理
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)      
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#訓練感知器
from sklearn.linear_model import Perceptron
ppn = Perceptron(n_iter = 40, eta0 = 0.1, random_state = 0)
ppn.fit(X_train_std, y_train)

#使用predict方法對測試資料集進行預測
y_pred = ppn.predict(X_test_std)
print('Misclassified Samples:%d' %(y_test != y_pred).sum())


#繪製訓練模型的決策區域
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

#繪製決策區域函式的定義
def plot_decision_regions(X, y, classifier, test_idx = None, resolution =0.01):
    
    #設定顏色表
    markers = ('s', 'x', 'o', '^', '*')
    colors = ('red', 'cyan', 'blue', 'yellow', 'lightred')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    
    #繪製決策區域
    x1_min, x1_max = X[:,0].min()-1, X[:,0].max()+1
    x2_min, x2_max = X[:,1].min()-1, X[:,1].max()+1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha = 0.4, cmap = cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())
    
    #繪製所有例項
    X_test, y_test = X[test_idx, :], y[test_idx]
    for idx,c1 in enumerate(np.unique(y)):
        plt.scatter(x = X[y == c1, 0], y =X[y == c1, 1],
        alpha = 0.8, c = cmap(idx),
        marker = markers[idx], label = c1)
        
    #高亮測試例項
    if test_idx:
        X_test, y_test = X[test_idx,:],  y[test_idx]
        plt.scatter(X_test[:,0], X_test[:,1], c = '', 
                    alpha = 1.0, linewidth = 1, marker = 'o',
                    s = 55, label = 'test label')

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

#LR
from sklearn.linear_model import LogisticRegression
#樸素貝葉斯:
from sklearn.naive_bayes import GaussianNB
#K-近鄰:
from sklearn.neighbors import KNeighborsClassifier
#決策樹:
from sklearn.tree import DecisionTreeClassifier
#支援向量機:
from sklearn import svm

lr = LogisticRegression(C=1000)
lr.fit(X_train_std, y_train)

gnb = GaussianNB()
gnb.fit(X_train_std, y_train)

knc = KNeighborsClassifier()
knc.fit(X_train_std, y_train)

dt = DecisionTreeClassifier()
dt.fit(X_train_std, y_train)

svmLR = svm.LinearSVR()
svmLR.fit(X_train_std, y_train)

plt.figure()
plot_decision_regions(X = X_combined_std,
                      y = y_combined,
                      classifier = lr, #此處使用分類器ppn或者lr
                      test_idx = range(105, 150))
plt.title('LinearRegression')

plt.figure()
plot_decision_regions(X = X_combined_std,
                      y = y_combined,
                      classifier = gnb, #此處使用分類器ppn或者lr
                      test_idx = range(105, 150))
plt.title('GaussianNB')

plt.figure()
plot_decision_regions(X = X_combined_std,
                      y = y_combined,
                      classifier = knc, #此處使用分類器ppn或者lr
                      test_idx = range(105, 150))
plt.title('KNeighborsClassifier')

plt.figure()
plot_decision_regions(X = X_combined_std,
                      y = y_combined,
                      classifier = dt, #此處使用分類器ppn或者lr
                      test_idx = range(105, 150))
plt.title('DecisionTreeClassifier')

plt.figure()
plot_decision_regions(X = X_combined_std,
                      y = y_combined,
                      classifier = svmLR, #此處使用分類器ppn或者lr
                      test_idx = range(105, 150))
plt.title('svm.LinearSVR')

plt.xlabel("長度")
plt.ylabel("寬度")
plt.legend(loc = 'upper left')
plt.show()

這些程式碼是在python3.6上執行的,時間緊促暫時沒有分析結果,後面修改!