機器學習之支援向量機演算法例項
阿新 • • 發佈:2019-02-06
此例項是利用svm演算法預測乳腺癌腫瘤是良性還是惡性,資料格式如下圖所示:第一列表示編號,2到10列表示資料屬性,第11列表示腫瘤標籤2表示良性4表示惡性。
程式碼如下
from sklearn import svm # x = [[2, 0], [1, 1], [2, 3]] # 特徵向量 # y = [0, 0, 1] # 標籤 # clf = svm.SVC(kernel = 'linear') # clf.fit(x, y) # # print(clf) # print(clf.support_vectors_) # 列印支援向量點的座標# print(clf.support_) # 列印支援向量點的位置 # print(clf.n_support_) # 列印支援向量點的個數 # print(clf.predict([5,5])) # 預測 import pandas from sklearn.cross_validation import train_test_split from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix importnumpy as np # ---------------------------------由於資料中很多例項存在問號以下是去問號過程------------------------------------- breast_cancer_wisconsin = np.genfromtxt("breast_cancer_wisconsin.txt", delimiter=",",dtype="str",skip_header=1) X = breast_cancer_wisconsin[:,1:-1] list1 = breast_cancer_wisconsin.tolist() a = [] fori in range(X.shape[0]): if '?' not in list1[i]: a.append(list1[i][0]) breast_cancer_wisconsin1 = pandas.read_csv("breast_cancer_wisconsin.txt",dtype="str") breast_cancer_wisconsin_a = breast_cancer_wisconsin1.set_index('a') b = breast_cancer_wisconsin_a.loc[a] # b.to_csv(file_path, encoding='utf-8', index=False) b.to_csv(r'D:\BaiduNetdiskDownload\程式碼與素材\程式碼與素材(1)\04NN\test1.csv', encoding='utf-8') # ---------------------------------------------資料預處理結束--------------------------------------------------------- test1 = np.genfromtxt("test1.csv", delimiter=",",dtype="int",skip_header=1) X = test1[:,1:-1] y = test1[:,-1] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25) clf = svm.SVC(kernel = 'linear') clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(classification_report(y_test, y_pred)) print(confusion_matrix(y_test, y_pred)) print(clf.support_vectors_) # 列印支援向量點的座標 print(clf.n_support_) # 列印支援向量點的個數 print(clf.support_) # 列印支援向量點的位置 print(clf.predict([1,2,4,8,3,7,3,6,4])) # 預測
預測結果如下圖: