1. 程式人生 > >支援向量機——非線性分類SVM

支援向量機——非線性分類SVM

模型原型
sklearn.svm.SVC(C=1.0,kernel=’rbf’,degree=3,gamma=’auto’,coef0=0.0,shrinking=True,probability=False,tol=0.001,cache_size=200, class_weight=None,verbose=False,max_iter=-1,decision_function_shape=None,random_state=None)
引數

  • C
  • kernel
  • degree
  • gamma
  • coef0
  • shrinking:是否使用啟發式收縮(shrinking heuristic)
  • probability:是否進行概率估計(必須在訓練之前設定好,且概率估計會拖慢訓練速度)
  • tol
  • cache_size:指定了kernel cache的大小,單位為MB
  • class_weight
  • verbose
  • max_iter
  • decision_function_shape:指定決策函式的形狀
    • ’ovr’:使用one-vs-rest準則,決策函式的形狀是(n_samples,n_classes)
    • ‘ovo’:使用one-vs-one準則,決策函式的形狀是(n_samples,n_classes*(n_classes-1)/2)
    • None:預設值
  • random_state

屬性

  • support_:一個數組,形狀為[n_SV],支援向量的下標
  • supportvectors:一個數組,形狀為[n_SV,n_features],支援向量
  • n_support:一個數組-like,形狀為[n_class],每一個分類的支援向量的個數
  • dual_coef:一個數組,形狀為[n_class-1,n_SV] (對偶問題中,在分類決策函式中每個支援向量的係數)
  • coef_:一個數組,形狀為[n_class-1,n_features] (原始問題中,每個特徵的係數,只在linear
    kernel中有效)
  • intercept_:一個數組,形狀為[n_class*(n_class)/2]決策函式中的常數項

方法

  • fit(X,y[,sample_weight])
  • predict(X)
  • score(X,y[,sample_weight])
  • predict_log_proba(X)
  • predict_proba(X)
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,cross_validation,svm

載入資料

def load_data_classfication():
    iris=datasets.load_iris()
    X_train=iris.data
    y_train=iris.target
    return cross_validation.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train)

不同的核的影響

#線性核
def test_SVC_linear(*data):
    X_train,X_test,y_train,y_test=data
    cls=svm.SVC(kernel='linear')
    cls.fit(X_train,y_train)
    print('Coefficients:%s,\nintercept %s'%(cls.coef_,cls.intercept_))
    print('Score:%.2f'%cls.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_data_classfication()
test_SVC_linear(X_train,X_test,y_train,y_test)
#多項式核
def test_SVC_poly(*data):
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()

    #測試degree
    degrees=range(1,20)
    train_scores=[]
    test_scores=[]
    for degree in degrees:
        cls=svm.SVC(kernel='poly',degree=degree)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,3,1)
    ax.plot(degrees,train_scores,label="Training score",marker='x')
    ax.plot(degrees,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_poly_degree')
    ax.set_xlabel('p')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)

    #測試gamma
    gammas=range(1,20)
    train_scores=[]
    test_scores=[]
    for gamma in gammas:
        cls=svm.SVC(kernel='poly',gamma=gamma,degree=3)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,3,2)
    ax.plot(gammas,train_scores,label='Training score',marker='+')
    ax.plot(gammas,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_poly_gamma')
    ax.set_xlabel(r'$\gamma$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)

    #測試r
    rs=range(20)
    train_scores=[]
    test_scores=[]
    for r in rs:
        cls=svm.SVC(kernel='poly',gamma=10,degree=3,coef0=r)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,3,3)
    ax.plot(rs,train_scores,label="Training score",marker='+')
    ax.plot(rs,test_scores,label='Testing scores',marker='o')
    ax.set_title('SVC_poly_r')
    ax.set_xlabel(r'r')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)
    plt.show()

test_SVC_poly(X_train,X_test,y_train,y_test)
#高斯核
def test_SVC_rbf(*data):
    X_train,X_test,y_train,y_test=data
    gammas=range(1,20)
    train_scores=[]
    test_scores=[]
    for gamma in gammas:
        cls=svm.SVC(kernel='rbf',gamma=gamma)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(gammas,train_scores,label="Training score",marker='+')
    ax.plot(gammas,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_rbf')
    ax.set_xlabel(r'$\gamma$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)
    plt.show()

test_SVC_rbf(X_train,X_test,y_train,y_test)
#sigmoid核
def test_SVC_sigmoid(*data):
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()

    #測試gamma
    gammas=np.logspace(-2,1)
    train_scores=[]
    test_scores=[]
    for gamma in gammas:
        cls=svm.SVC(kernel='sigmoid',gamma=gamma,coef0=0)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,2,1)
    ax.plot(gammas,train_scores,label='Training score',marker='+')
    ax.plot(gammas,test_scores,label="testing score",marker='o')
    ax.set_title('SVC_sigmoid_gammas')
    ax.set_xscale('log')
    ax.set_xlabel(r'$\gamma$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)

    #測試r
    rs=np.linspace(0,5)
    train_scores=[]
    test_scores=[]
    for r in rs:
        cls=svm.SVC(kernel='sigmoid',coef0=r,gamma=0.01)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,2,2)
    ax.plot(rs,train_scores,label="Training score",marker='+')
    ax.plot(rs,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_sigmoid_r')
    ax.set_xlabel(r'r')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)
    plt.show()

test_SVC_sigmoid(X_train,X_test,y_train,y_test)