Python資料分析與機器學習-SVM調參例項
阿新 • • 發佈:2019-01-31
import numpy as np import matplotlib.pyplot as plt from scipy import stats from sklearn.svm import SVC from sklearn.datasets.samples_generator import make_blobs # X, y = make_blobs(n_samples=200, centers=2, cluster_std=0.6, random_state=0) # plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap="autumn") # model = SVC(kernel="linear") # model.fit(X, y) # 繪圖函式 def plot_svc_decision_function(model, ax=None, plot_support=True): """Plot the decision function for a 2D SVC""" if ax is None: ax = plt.gca() xlim = ax.get_xlim() ylim = ax.get_ylim() # create grid to evaluate model x = np.linspace(xlim[0], xlim[1], 30) y = np.linspace(ylim[0], ylim[1], 30) Y, X = np.meshgrid(y, x) xy = np.vstack([X.ravel(), Y.ravel()]).T P = model.decision_function(xy).reshape(X.shape) # plot decision boundary and margins ax.contour(X, Y, P, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) # plot support vectors if plot_support: ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, linewidth=1, facecolors='none'); ax.set_xlim(xlim) ax.set_ylim(ylim) # plot_svc_decision_function(model) # plt.show() '''引入核函式的SVM''' from sklearn.datasets.samples_generator import make_circles # X, y = make_circles(n_samples=100, noise=.1, factor=.1, random_state=1) # clf = SVC(kernel="linear").fit(X,y) # plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') # plot_svc_decision_function(clf,plot_support=False) # plt.show() '''加入徑向基函式==高斯核函式''' # clf = SVC(kernel="rbf") # clf.fit(X, y) # plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') # plot_svc_decision_function(clf) # plt.show() '''調節SVM引數: Soft Margin問題''' ''' 調節C引數 當C趨近於無窮大時:意味著分類嚴格不能有錯誤 當C趨近於很小的時:意味著可以有更大的錯誤容忍 ''' # X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=0.8) # # fig, ax = plt.subplots(1, 2, figsize=(16, 6)) # # for axi, C in zip(ax, [10.0, 0.1]): # model = SVC(kernel='linear', C=C).fit(X, y) # axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') # plot_svc_decision_function(model, axi) # axi.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, lw=1, facecolors='none'); # axi.set_title('C = {0:.1f}'.format(C), size=14) # plt.show() '''gamma控制模型的複雜程度,值越大模型越複雜''' # X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=1.1) # fig, ax = plt.subplots(1, 2, figsize=(16, 6)) # # for axi, gamma in zip(ax, [10.0, 0.1]): # gamma控制模型的複雜程度,值越大模型越複雜 # model = SVC(kernel='rbf', gamma=gamma).fit(X, y) # axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') # plot_svc_decision_function(model, axi) # axi.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, lw=1, facecolors='none'); # axi.set_title('gamma = {0:.1f}'.format(gamma), size=14) # plt.show() '''Example: Face Recognition''' from sklearn.datasets import fetch_lfw_people # fetch_lfw_people人臉資料集 faces = fetch_lfw_people(min_faces_per_person=60) # min_faces_per_person 每個人有60個人臉 print(faces.target_names) print(faces.images.shape) fig, ax = plt.subplots(3, 5) for i, axi in enumerate(ax.flat): axi.imshow(faces.images[i], cmap='bone') axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]]) from sklearn.svm import SVC # from sklearn.decomposition import RandomizedPCA from sklearn.decomposition import PCA from sklearn.pipeline import make_pipeline pca = PCA(n_components=150, whiten=True) svc = SVC(kernel='rbf', class_weight='balanced') model = make_pipeline(pca, svc) from sklearn.model_selection import train_test_split Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target, random_state=40) from sklearn.model_selection import GridSearchCV # GridSearchCV做差數選擇 param_grid = {'svc__C': [1, 5, 10], 'svc__gamma': [0.0001, 0.0005, 0.001]} grid = GridSearchCV(model, param_grid) grid.fit(Xtrain, ytrain) print(grid.best_params_) model = grid.best_estimator_ yfit = model.predict(Xtest) yfit.shape fig, ax = plt.subplots(4, 6) for i, axi in enumerate(ax.flat): axi.imshow(Xtest[i].reshape(62, 47), cmap='bone') axi.set(xticks=[], yticks=[]) axi.set_ylabel(faces.target_names[yfit[i]].split()[-1], color='black' if yfit[i] == ytest[i] else 'red') fig.suptitle('Predicted Names; Incorrect Labels in Red', size=14); from sklearn.metrics import classification_report print(classification_report(ytest, yfit, target_names=faces.target_names)) from sklearn.metrics import confusion_matrix # 混淆矩陣 import seaborn as sns; mat = confusion_matrix(ytest, yfit) sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False, xticklabels=faces.target_names, yticklabels=faces.target_names) plt.xlabel('true label') plt.ylabel('predicted label');