1. 程式人生 > >python 驗證碼識別示例(二) 復雜驗證碼識別

python 驗證碼識別示例(二) 復雜驗證碼識別

def code 分享 切割 edr convert pen https lis

   在這篇博文中手把手教你如何去分割驗證,然後進行識別。

一:下載驗證碼

    技術分享圖片

  驗證碼分析,圖片上有折線,驗證碼有數字,有英文字母大小寫,分類的時候需要更多的樣本,驗證碼的字母是彩色的,圖片上有雪花等噪點,因此識別改驗證碼難度較大

二:二值化和降噪:

  技術分享圖片

 

 三: 切割:

    技術分享圖片

四:分類:

    技術分享圖片

五: 測試識別率

  技術分享圖片 技術分享圖片 技術分享圖片   技術分享圖片   技術分享圖片

六:總結:

  綜合識別率在70%左右,對於這個識別率我覺得還是挺高的,因為這個驗證碼的識別難度還是很大

代碼:

1. 下載圖片:

  

#-*-coding:utf-8-*-
import requests def spider(): url = "https://www.epailive.com/basic/captcha?ran=0.22070346581876787" for i in range(1, 101): print("正在下載的張數是:",i) with open("./1__get_image/{}.png".format(i), "wb") as f: f.write(requests.get(url).content) spider()

二值化和降噪:

  

#
-*-coding:utf-8-*- # coding:utf-8 import sys, os from PIL import Image, ImageDraw # 二值數組 t2val = {} def twoValue(image, G): for y in range(0, image.size[1]): for x in range(0, image.size[0]): g = image.getpixel((x, y)) if g > G: t2val[(x, y)]
= 1 else: t2val[(x, y)] = 0 # 根據一個點A的RGB值,與周圍的8個點的RBG值比較,設定一個值N(0 <N <8),當A的RGB值與周圍8個點的RGB相等數小於N時,此點為噪點 # G: Integer 圖像二值化閥值 # N: Integer 降噪率 0 <N <8 # Z: Integer 降噪次數 # 輸出 # 0:降噪成功 # 1:降噪失敗 def clearNoise(image, N, Z): for i in range(0, Z): t2val[(0, 0)] = 1 t2val[(image.size[0] - 1, image.size[1] - 1)] = 1 for x in range(1, image.size[0] - 1): for y in range(1, image.size[1] - 1): nearDots = 0 L = t2val[(x, y)] if L == t2val[(x - 1, y - 1)]: nearDots += 1 if L == t2val[(x - 1, y)]: nearDots += 1 if L == t2val[(x - 1, y + 1)]: nearDots += 1 if L == t2val[(x, y - 1)]: nearDots += 1 if L == t2val[(x, y + 1)]: nearDots += 1 if L == t2val[(x + 1, y - 1)]: nearDots += 1 if L == t2val[(x + 1, y)]: nearDots += 1 if L == t2val[(x + 1, y + 1)]: nearDots += 1 if nearDots < N: t2val[(x, y)] = 1 def saveImage(filename, size): image = Image.new("1", size) draw = ImageDraw.Draw(image) for x in range(0, size[0]): for y in range(0, size[1]): draw.point((x, y), t2val[(x, y)]) image.save(filename) for i in range(1, 101): path = "1__get_image/" + str(i) + ".png" image = Image.open(path) image = image.convert(L) twoValue(image, 198) clearNoise(image, 3, 1) path1 = "2__erzhihua_jiangzao/" + str(i) + ".jpg" saveImage(path1, image.size)

三:切割驗證碼:

  

#-*-coding:utf-8-*-


from PIL import Image



def smartSliceImg(img, outDir, ii,count=4, p_w=3):
    ‘‘‘
    :param img:
    :param outDir:
    :param count: 圖片中有多少個圖片
    :param p_w: 對切割地方多少像素內進行判斷
    :return:
    ‘‘‘
    w, h = img.size
    pixdata = img.load()
    eachWidth = int(w / count)
    beforeX = 0
    for i in range(count):

        allBCount = []
        nextXOri = (i + 1) * eachWidth

        for x in range(nextXOri - p_w, nextXOri + p_w):
            if x >= w:
                x = w - 1
            if x < 0:
                x = 0
            b_count = 0
            for y in range(h):
                if pixdata[x, y] == 0:
                    b_count += 1
            allBCount.append({x_pos: x, count: b_count})
        sort = sorted(allBCount, key=lambda e: e.get(count))

        nextX = sort[0][x_pos]
        box = (beforeX, 0, nextX, h)
        img.crop(box).save(outDir + str(ii) + "_" + str(i) + ".png")
        beforeX = nextX

for ii in  range(1, 101):
    path = "2__erzhihua_jiangzao/" + str(ii) + ".jpg"
    img = Image.open(path)
    outDir = 3__qiege/
    smartSliceImg(img, outDir, ii,count=4, p_w=3)

四:訓練:

    

#-*-coding:utf-8-*-

import numpy as np
import os
import time

from PIL import Image
from sklearn.externals import joblib
from sklearn.neighbors import KNeighborsClassifier


def load_dataset():
    X = []
    y = []
    for i in "23456789ABVDEFGHKMNPRSTUVWXYZ":
        target_path = "fenlei/" + i
        print(target_path)
        for title in os.listdir(target_path):
            pix = np.asarray(Image.open(os.path.join(target_path, title)).convert(L))
            X.append(pix.reshape(25 * 30))
            y.append(target_path.split(/)[-1])

    X = np.asarray(X)
    y = np.asarray(y)
    return X, y

def check_everyone(model):
    pre_list = []
    y_list = []
    for i in "23456789ABCDEFGHKMNPRSTUVWXYZ":
        part_path = "part/" + i
        for title in os.listdir(part_path):
            pix = np.asarray(Image.open(os.path.join(part_path, title)).convert(L))
            pix = pix.reshape(25 * 30)
            pre_list.append(pix)
            y_list.append(part_path.split(/)[-1])
    pre_list = np.asarray(pre_list)
    y_list = np.asarray(y_list)

    result_list = model.predict(pre_list)
    acc = 0
    for i in result_list == y_list:
        print(result_list,y_list,)

        if i == np.bool(True):
            acc += 1
    print(acc, acc / len(result_list))


X, y = load_dataset()
knn = KNeighborsClassifier()
knn.fit(X, y)
joblib.dump(knn, yipai.model)
check_everyone(knn)

六:測試:

    

# -*- coding: utf-8 -*-

import numpy as np
from PIL import Image
from sklearn.externals import joblib
import os

target_path = "1__get_image/"
source_result = []
for title in os.listdir(target_path):
    source_result.append(title.replace(.png,‘‘))


def predict(model):
    predict_result = []
    for q in range(1,101):
        pre_list = []
        y_list = []
        for i in range(0,4):
            part_path = "part1/" + str(q) + "_" + str(i) + ".png"
            # print(part_path)
            pix = np.asarray(Image.open(os.path.join(part_path)))
            pix = pix.reshape(25 * 30)
            pre_list.append(pix)
            y_list.append(part_path.split(/)[-1])
        pre_list = np.asarray(pre_list)
        y_list = np.asarray(y_list)

        result_list = model.predict(pre_list)
        print(result_list,q)


        predict_result.append(str(result_list[0] + result_list[1] + result_list[2] + result_list[3]))

    return predict_result


model = joblib.load(yipai.model)
predict_result = predict(model)
# print(source_result)
# print(predict_result)

python 驗證碼識別示例(二) 復雜驗證碼識別