Sklearn__SVM實現手寫數字識別

阿新 • • 發佈：2019-01-08

1、資料準備

from sklearn.model_selection import StratifiedShuffleSplit
import pandas as pd
import numpy  as np
from sklearn.datasets import fetch_mldata

class Data_need():
	def __init__(self, percent, data_name):
		self.percent = percent
		self.data_name = data_name

	def get_data(self):
		data_home = r'D:\Python_data\python Data\sklearn' 

		mnist = fetch_mldata(self.data_name, data_home=data_home)
		return mnist['data'], mnist['target']

	## 打亂資料集
	def random_data(self, x, y):
		mnist_train, mnist_test = 0, 0
		## 建立DataFrame
		data_y = pd.DataFrame(y, columns=['y'])
		n = len(x[0])
		data_x = pd.DataFrame(x, columns=list(range(n)))
		mnist_data = 
 pd.merge(data_x, data_y, right_index=True, left_index=True)
		## 分層取樣
		split = StratifiedShuffleSplit(n_splits=1, test_size = self.percent, random_state=42)
		for train_index, test_index in split.split(mnist_data, mnist_data['y']):
			mnist_train = mnist_data.loc[train_index,:]
			mnist_test = mnist_data. 
loc[test_index,:]
		return mnist_train, mnist_test

	def train_test_data(self, train, test):
		# 將畫素資料變為二值變數
		return (np.array(train.iloc[:,:-1]) != 0)*1, np.array(train['y']), (np.array(test.iloc[:,:-1])!= 0)*1, np.array(test['y'])


if __name__ == '__main__':
	data_need = Data_need(0.3, 'MNIST original')
	x, y = data_need.get_data()
	train, test = data_need.random_data(x, y)
	x_train_in, y_train_in, x_test_in, y_test_in = data_need.train_test_data(train, test)

2、檢視資料及模型訓練

模型採用ovr (ova)SMV模型

from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
plt.style.use('ggplot')

def to_plot(num, n):
	"""
	num: 想要繪製的數值
	n :第幾個樣本
	"""
	plt_x_array = x_train_in[y_train_in == num]
	some_digit = plt_x_array[n]
	some_digit_image = some_digit.reshape(28, 28)
	plt.imshow(some_digit_image, cmap=plt.cm.binary, interpolation='nearest')
	plt.axis('off')
	plt.show()


if __name__ == '__main__' :
	to_plot(8, 10)
	ova_svm_clf = LinearSVC(loss='hinge', C=5, multi_class='ovr')
	ova_svm_clf.fit(x_train_in, y_train_in)
	## 交叉驗證出預測
	y_prd = cross_val_predict(ova_svm_clf, x_train_in, y_train_in, cv=3)
	## 評估 混淆矩陣
	conf_m = confusion_matrix(y_train_in, y_prd)

在這裡插入圖片描述

3、模型評估


### 整體的準確率
def clf_correct(y_train, y_prd):
	return sum((y_train - y_prd) == 0) / len(y_train)


class plot_conf_m():
	def __init__(self, conf_m):
		self.conf_m = conf_m

	def plt_conf_m(self):
		## 用matshow()函式繪製出混淆矩陣
		plt.matshow(self.conf_m, cmap=plt.cm.gray)

	def plt_error_conf_m(self):
		## 關注誤差資料的影象呈現
		row_sums = self.conf_m.sum(axis=1, keepdims=True)
		norm_conf_m = self.conf_m / row_sums
		## 用0 將正確分類覆蓋 檢視那個類分類特別不準
		np.fill_diagonal(norm_conf_m, 0)
		plt.matshow(norm_conf_m, cmap=plt.cm.gray)


if __name__ == '__main__':
	print("整體準確性：{}".format(clf_correct(y_train_in, y_prd)))
	plt_confm = plot_conf_m(conf_m)
	plt_confm.plt_conf_m(), plt.title("Focus on the correct prediction")
	plt_confm.plt_error_conf_m(), plt.title("Focus on the error prediction")
	plt.show()

##  整體準確性：0.902795918367347

從下面兩個混淆矩陣中可以看出錯誤分類分佈比較平均，還待提高，所以增大C 進行重新擬合
在這裡插入圖片描述

4、模型修正及預測

1. 模型修正

if __name__ == '__main__' :
	ova_svm_clf_fix = LinearSVC(loss='hinge', C=10, multi_class='ovr')
	ova_svm_clf_fix.fit(x_train_in, y_train_in)
	## 交叉驗證出預測
	y_prd_fix = cross_val_predict(ova_svm_clf_fix, x_train_in, y_train_in, cv=3)
	## 評估 混淆矩陣
	conf_m_fix = confusion_matrix(y_train_in, y_prd_fix)

	print("整體準確性：{}".format(clf_correct(y_train_in, y_prd_fix)))
	plt_confm_fix = plot_conf_m(conf_m_fix)
	plt_confm_fix.plt_conf_m(), plt.title("Focus on the correct prediction")
	plt_confm_fix.plt_error_conf_m(), plt.title("Focus on the error prediction")
	plt.show()

## 整體準確率0.91204

增大C 雖然提高了整體的準確率，對準確率並沒有明顯好轉，可見線性核對該資料分類效果不明顯。所以改用高斯核進行擬合。
在這裡插入圖片描述

from sklearn.svm import SVC
from sklearn.metrics import classification_report

if __name__ == '__main__': # ova
	ova_svm_clf_rbf = SVC(kernel='rbf',gamma = 'auto', C = 15, cache_size= 8000, decision_function_shape = 'ovr')
	ova_svm_clf_rbf.fit(x_train_in, y_train_in)
	y_prd_rbf = ova_svm_clf_rbf.predict(x_train_in)
	print('整體準確率{}'.format(clf_correct(y_train_in, y_prd_rbf))) # 0.90
	conf_m_rbf = confusion_matrix(y_train_in, y_prd_rbf)
	plt_confm_rbf = plot_conf_m(conf_m_rbf)
	plt_confm_rbf.plt_conf_m(), plt.title("Focus on the correct prediction")
	plt_confm_rbf.plt_error_conf_m(), plt.title("Focus on the error prediction")
	plt.show()
	# 輸出詳細報告
	print(classification_report(y_train_in, y_prd_rbf))

"""
# 整體準確率：0.9831632653061224
             precision    recall  f1-score   support
        0.0       0.99      0.99      0.99      4832
        1.0       0.99      0.99      0.99      5514
        2.0       0.98      0.99      0.99      4893
        3.0       0.98      0.97      0.97      4999
        4.0       0.98      0.98      0.98      4777
        5.0       0.98      0.98      0.98      4419
        6.0       0.99      0.99      0.99      4813
        7.0       0.98      0.98      0.98      5105
        8.0       0.98      0.98      0.98      4777
        9.0       0.98      0.97      0.97      4871
avg / total       0.98      0.98      0.98     49000

"""

高斯核的準確率明顯提升了，但對9和4 與 3和5 的識別還是不是十分精確
在這裡插入圖片描述

2. 模型預測

if __name__ == '__main__' :
	y_test_prd = ova_svm_clf_fix.predict(x_test)
	print("整體準確性：{}".format(clf_correct(y_train, y_test_prd)))
	plt_confm_test = plot_conf_m(conf_m)
	plt_confm_test.plt_conf_m(), plt.title("Focus on the correct prediction")
	plt_confm_test.plt_error_conf_m(), plt.title("Focus on the error prediction")
	plt.show()
	# 輸出詳細報告
	print(classification_report(y_test_in, y_test_prd))

"""
# 整體準確性：0.9615238095238096
            precision    recall  f1-score   support
        0.0       0.97      0.99      0.98      2071
        1.0       0.97      0.98      0.98      2363
        2.0       0.96      0.97      0.96      2097
        3.0       0.95      0.95      0.95      2142
        4.0       0.96      0.96      0.96      2047
        5.0       0.96      0.94      0.95      1894
        6.0       0.97      0.98      0.97      2063
        7.0       0.97      0.96      0.97      2188
        8.0       0.95      0.95      0.95      2048
        9.0       0.94      0.94      0.94      2087
avg / total       0.96      0.96      0.96     21000

"""

在這裡插入圖片描述

Sklearn__SVM實現手寫數字識別

1、資料準備 from sklearn.model_selection import StratifiedShuffleSplit import pandas as pd import numpy

第二節，TensorFlow 使用前饋神經網絡實現手寫數字識別

com net config return pyplot dataset 運行算法但是一感知器感知器學習筆記：https://blog.csdn.net/liyuanbhu/article/details/51622695 感知器（Percep

第三節，TensorFlow 使用CNN實現手寫數字識別

啟用 out min 灰度 HA 打破 gre 大量 gray 上一節，我們已經講解了使用全連接網絡實現手寫數字識別，其正確率大概能達到98%，著一節我們使用卷積神經網絡來實現手寫數字識別，其準確率可以超過99%，程序主要包括以下幾塊內容 [1]: 導入數據，即測試集和

TensorFlow(九)：卷積神經網絡實現手寫數字識別以及可視化

writer orm true 交叉 lar write 執行 one 界面上代碼： import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data mnist =

TensorFlow(十二)：使用RNN實現手寫數字識別

rop mea pre rnn ext ini tro truncate tutorial 上代碼： import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data #

10 行程式碼，實現手寫數字識別

識別手寫的阿拉伯數字，對於人類來說十分簡單，但是對於程式來說還是有些複雜的。不過隨著機器學習技術的普及，使用10幾行程式碼，實現一個能夠識別手寫數字的程式，並不是一件難事。這是因為有太多的機器學習模型可以拿來直接用，比如tensorflow、caffe，在python下

深度學習2--tensorflow--Softmax迴歸實現手寫數字識別

使用Softmax迴歸來實現手寫數字識別，即給定一張手寫數字，判斷屬於0--9中哪一個數字。 1.LR邏輯迴歸先準備一下LR邏輯迴歸：廣義線性模型：實現x到y的非線性對映：在LR邏輯迴歸中取g函式：實現0--1對映輸出值為預測結果為1的概率

機器學習--k-近鄰演算法（kNN）實現手寫數字識別

這裡的手寫數字以0,1的形式儲存在文字檔案中，大小是32x32.目錄trainingDigits有1934個樣本。0-9每個數字大約有200個樣本，命名規則如下：下劃線前的數字代表是樣本0-9的

DCGAN實現手寫數字識別demo

論文解讀和原理介紹，在網上已經有大量文章，這裡就不在贅述。論文地址：Unsupervised Representations Learning With Deep Convolutional Generative Adversarial Networks 論文解讀：深度卷積對抗生成網路

教你用TensorFlow實現手寫數字識別

弱者用淚水安慰自己，強者用汗水磨練自己。這段時間因為專案中有一塊需要用到影象識別，最近就一直在煉丹，寶寶心裡苦，但是寶寶不說。。。能點開這篇文章的朋友估計也已經對TensorFlow有了一定了解，至少知道這是個什麼東西，我也就不過多介紹了。沒安裝TensorFlo

10 行程式碼實現手寫數字識別

可直接閱讀原文：http://c.raqsoft.com.cn/article/1540374496048?r=alice 識別手寫的阿拉伯數字，對於人類來說十分簡單，但是對於程式來說還是有些複雜的。不過隨著機器學習技術的普及，使用10幾行程式碼，實現一個能夠識別手

Python(TensorFlow框架)實現手寫數字識別系統

手寫數字識別演算法的設計與實現本文使用python基於TensorFlow設計手寫數字識別演算法，並程式設計實現GUI介面，構建手寫數字識別系統。這是本人的本科畢業論文課題，當然，這個也是機器學習的基本問題。本博文不會以論文的形式展現，而是以程式設計實戰

不用框架，Python實現手寫數字識別

有一句話說得好，要有造輪子的技術和用輪子的覺悟，今年來人工智慧火的不行，大家都爭相學習機器學習，作為學習大軍中的一員，我覺得最好的學習方法就是用python把機器學習演算法實現一遍，下面我介紹一下用邏輯迴歸實現手寫字型的識別。邏輯迴歸知識點回顧

各種機器學習方法（線性迴歸、支援向量機、決策樹、樸素貝葉斯、KNN演算法、邏輯迴歸）實現手寫數字識別並用準確率、召回率、F1進行評估

本文轉自：http://blog.csdn.net/net_wolf_007/article/details/51794254 前面兩章對資料進行了簡單的特徵提取及線性迴歸分析。識別率已經達到了85%，完成了數字識別的第一步：資料探測。這一章要做的就各

SVM實現手寫數字識別

SVM簡介知乎上的一個回答我認為是史上最NB最形象的SVM含義解釋，想看介紹戳這裡（裡面的第一個回答），再看看百科就能知道個大概了。開發環境 Windows10 + VS2013 + Qt580 + OpenCV300主要程式碼利用opencv-SVM演

學習KNN（三）KNN+HOG實現手寫數字識別

在學習KNN（二）KNN演算法手寫數字識別的OpenCV實現我們直接將畫素值作為特徵，實現了KNN演算法的手寫數字識別問題，並得到了較好的準確率，但是就像其他機器學習演算法一樣，KNN的物件同樣是特徵，所以我們可以用一種特徵提取演算法配合KNN實現手寫數字識

用python的numpy實現神經網路實現手寫數字識別

首先是讀取檔案，train-images-idx3-ubyte等四個檔案是mnist資料集裡的資料。放在MNIST資料夾裡。MNIST資料夾和這個.py檔案放在同一個資料夾裡。 import numpy as np import struct train_images

python tensorflow 基於cnn實現手寫數字識別

感覺剛才的程式碼不夠給力，所以再儲存一份基於cnn的手寫數字自識別的程式碼 # -*- coding: utf-8 -*- import tensorflow as tf from tensorflow.examples.tutorials.mnist

【深度學習】3：BP神經網路與MNIST資料集實現手寫數字識別

前言：這是一篇基於tensorflow框架，建立的只有一層隱藏層的BP神經網路，做的圖片識別，內容也比較簡單，全當是自己的學習筆記了。 –—-—-—-—-—-—-—-—-—-—-—-—–—-—-—-—-—-—-—-——-—-—-—-—-—-—-—-—-—-—-

MachineLearning— (KNN)k Nearest Neighbor實現手寫數字識別（三）

本篇博文主要結合前兩篇的knn演算法理論部分knn理論理解（一）和knn理論理解（二），做一個KNN的實現，主要是根據《機器學習實戰》這本書的內容，一個非常經典有趣的例子就是使用knn最近鄰演算法來實現對手寫數字的識別，下面將給出Python程式碼，儘量使用詳盡的解

Sklearn__SVM實現手寫數字識別

1、 資料準備

2、檢視資料及模型訓練

3、模型評估

4、模型修正及預測

1. 模型修正

2. 模型預測

相關推薦

1、資料準備