1. 程式人生 > >機器學習——KNN

機器學習——KNN

load -s 創建 數據 sklearn lac bsp otl 訓練數據

導入類庫

 1 import numpy as np
 2 from sklearn.neighbors import KNeighborsClassifier
 3 from sklearn.model_selection import train_test_split
 4 from sklearn.preprocessing import StandardScaler
 5 from sklearn.linear_model import LinearRegression
 6 from sklearn.metrics import r2_score
 7 from sklearn.datasets import
load_iris 8 import matplotlib.pyplot as plt 9 import pandas as pd 10 import seaborn as sns
# 熵增益
# 熵越大,信息量越大,蘊含的不確定性越大
KNN
1.計算待預測值到所有點的距離
2.對所有距離排序
3.找出前K個樣本裏面類別最多的類,作為待預測值的類別

代碼

 1 A = np.array([[1, 1], [1, 1.5], [0.5, 1.5]])
 2 B = np.array([[3.0, 3.0], [3.0, 3.5], [2.8, 3.1]])
 3 
 4 
 5 def knn_pre_norm(point):
6 a_len = np.linalg.norm(point - A, axis=1) 7 b_len = np.linalg.norm(point - B, axis=1) 8 print(a_len.min()) 9 print(b_len.min()) 10 11 12 def knn_predict_rev(point): 13 X = np.array([[1, 1], [1, 1.5], [0.5, 1.5], [3.0, 3.0], [3.0, 3.5], [2.8, 3.1]]) 14 Y = np.array([0, 0, 0, 1, 1, 1])
15 16 knn = KNeighborsClassifier(n_neighbors=2) 17 knn.fit(X, Y) 18 19 print(knn.predict(np.array([[1.0, 3.0]]))) 20 21 22 def iris_linear(): 23 # 加載iris數據 24 li = load_iris() 25 # 散點圖 26 # plt.scatter(li.data[:, 0], li.data[:, 1], c=li.target) 27 # plt.scatter(li.data[:, 2], li.data[:, 3], c=li.target) 28 # plt.show() 29 # 分割測試集和訓練集,測試集占整個數據集的比例是0.25 30 x_train, x_test, y_train, y_test = train_test_split(li.data, li.target, test_size=0.25) 31 # 創建KNN分類,使用最少5個鄰居作為類別判斷標準 32 knn = KNeighborsClassifier(n_neighbors=5) 33 # 訓練數據 34 knn.fit(x_train, y_train) 35 # 預測測試集 36 # print(knn.predict(x_test)) 37 # 預測np.array([[6.3, 3, 5.2, 2.3]]) 38 print(knn.predict(np.array([[6.3, 3, 5.2, 2.3]]))) 39 # 預測np.array([[6.3, 3, 5.2, 2.3]])所屬各個類別的概率 40 print(knn.predict_proba(np.array([[6.3, 3, 5.2, 2.3]]))) 41 42 43 if __name__ == __main__: 44 # knn_predict_rev(None) 45 # knn_pre_norm(np.array([2.3,2.3])) 46 iris_linear()

機器學習——KNN