1. 程式人生 > >KNN算法實現手寫數字

KNN算法實現手寫數字

訓練 readline .cn erro code spa https tor asp

from numpy import *
import operator
from os import listdir


def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    diffMat = tile(inX, (dataSetSize,1)) - dataSet
    sqDiffMat = diffMat ** 2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances ** 0.5
    sortedDistIndicies 
= distances.argsort() classCount = {} for i in range(k): voteIlabel = labels[sortedDistIndicies[i]] classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0]
def img2Vector(filename): returnVect = zeros((1,1024)) # print(returnVect) fr = open(filename) for i in range(32): lineStr = fr.readline() for j in range(32): returnVect[0,32*i+j] = int(lineStr[j]) return returnVect def handwritingClassTest(): hwLabels
= [] trainingFileList = listdir(trainingDigits) m = len(trainingFileList) trainingMat = zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split(.)[0] classNumStr = int(fileStr.split(_)[0]) hwLabels.append(classNumStr) trainingMat[i,:] = img2Vector(trainingDigits/%s%fileNameStr) testFileList = listdir(testDigits) errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split(.)[0] classNumStr = int(fileStr.split(_)[0]) vectorUnderTest = img2Vector(testDigits/%s%fileNameStr) classifierResult = classify0(vectorUnderTest,trainingMat,hwLabels,3) print("the classifier came back with:%d,the real answer is :%d"%(classifierResult,classNumStr)) if (classifierResult != classNumStr): errorCount += 1 print("the total number of errors is :%d"%errorCount) print("the total error rate is: %f"%(errorCount/float(mTest))) handwritingClassTest()

測試集+訓練集數據地址:https://i.cnblogs.com/Files.aspx

knn.rar

KNN算法實現手寫數字