1. 程式人生 > >TensorFlow實現驗證碼識別(十七)

TensorFlow實現驗證碼識別(十七)

TensorFlow實現驗證碼的識別

使用的生成驗證碼庫是captcha。

首先是驗證碼的生成,先生成一個驗證碼。

程式碼:

import random
import tensorflow as tf
from captcha.image import ImageCaptcha
import matplotlib.pyplot as plt
import PIL.Image
import numpy as np

number = [0,1,2,3,4,5,6,7,8,9]
#alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
#ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'] def random_alphabet(chat_set = number , chat_size = 4): chat_test = [] for i in range(chat_size): chat = random.choice(chat_set) chat_test.append(chat) return
chat_test def get_text_image(): image = ImageCaptcha() text = random_alphabet() for i in range(len(text)): text[i] = str(text[i]) new_text = ''.join(text) text = new_text captcha = image.generate(text) captcha_image = PIL.Image.open(captcha) captcha_image = np.array(captcha_image) return
text ,captcha_image def show(): text , image = get_text_image() f = plt.figure() ax = f.add_subplot(111) plt.imshow(image) plt.show()

這幾個就是生成驗證碼的函式,放在image_Create.py檔案下。

解釋下程式碼吧

首先是先得到要一個驗證碼上的數字:

因為這個如果識別的數字或字母特別多,要訓練很久,於是就只是進行數字的驗證碼識別。

def random_alphabet(chat_set = number , chat_size = 4):
    chat_test = []
    for i in range(chat_size):
        chat = random.choice(chat_set)
        chat_test.append(chat)
    return chat_test

random.choice就是在set集合中選擇4個驗證碼的數字加入到數字集。返回即可。

def get_text_image():
    image = ImageCaptcha()
    text = random_alphabet()
    for i in range(len(text)):
        text[i] = str(text[i])
    new_text = ''.join(text)
    text = new_text
    captcha = image.generate(text)
    captcha_image = PIL.Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return text ,captcha_image

這個函式就是返回一個驗證碼數字和一個圖片,驗證碼的圖片。

captcha = image.generate(text)
這個就是生成驗證碼的圖片了。後面就是一些固定操作了。
def show():
    text , image = get_text_image()
    f = plt.figure()
    ax = f.add_subplot(111)
    plt.imshow(image)
    plt.show()

這個就是一個現實圖片的函式。加一個迴圈把list裡面的數字都變成一個字串,然後把整個list轉換成一個string,最後用imshow把圖片放進去,show顯示。


接下來就是進行一個訓練了。

import tensorflow as tf
import numpy as np
import image_Create
import prev
import matplotlib.pyplot as plt

def train_cell(x , w_alpha = 0.01 , b_alpha = 0.1):
    x = tf.reshape(x , shape=[-1 , image_height , image_width , 1])
    w_c1 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 1 , 32]))
    b_c1 = tf.Variable(b_alpha * tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x , w_c1 , strides=[1 , 1 , 1 , 1] , padding='SAME') , b_c1))
    conv1 = tf.nn.max_pool(conv1 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv1 = tf.nn.dropout(conv1 , keep_cell)

    w_c2 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 32 , 64]))
    b_c2 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1 , w_c2 ,strides=[1 , 1 , 1 , 1],padding='SAME'),b_c2))
    conv2 = tf.nn.max_pool(conv2 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv2 = tf.nn.dropout(conv2 , keep_cell)

    w_c3 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 64 , 64]))
    b_c3 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2 , w_c3 , strides=[1 , 1 , 1 , 1],padding='SAME'),b_c3))
    conv3 = tf.nn.max_pool(conv3 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv3 = tf.nn.dropout(conv3 , keep_cell)

    w_d = tf.Variable(w_alpha * tf.random_normal([8 * 20 * 64 , 1024]))
    b_d = tf.Variable(b_alpha * tf.random_normal([1024]))
    dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
    dense = tf.nn.dropout(dense, keep_cell)

    w_out = tf.Variable(w_alpha*tf.random_normal([1024, Max_text * Char_size]))
    b_out = tf.Variable(b_alpha*tf.random_normal([Max_text * Char_size]))
    out = tf.add(tf.matmul(dense, w_out), b_out)
    return out
    pass
def train_CNN(x , y):
    output = train_cell(x)
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output , labels=y))
    optimer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    predict = tf.reshape(output, [-1, Max_text, Char_size])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(y, [-1, Max_text , Char_size]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        step = 0
while True:
            batch_x, batch_y = get_next_batch(64)
            _, loss_ = sess.run([optimer, loss], feed_dict={x: batch_x, y: batch_y, keep_cell: 0.75})
            print('第',step,'步','損失', loss_)

            # 每100 step計算一次準確率
if step % 10 == 0:
                batch_x_test, batch_y_test = get_next_batch(100)
                acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_cell: 1.})
                print('第',step,'步', '準確',acc)
if step > 1000:
                    saver.save(sess, "./model/crack_capcha.model", global_step=step)
                    break
step+=1
pass
def get_next_batch(batch_size = 128):
    batch_x = np.zeros([batch_size , image_height * image_width])
    batch_y = np.zeros([batch_size , Max_text * Char_size])
    for i in range(batch_size):
        text , image = image_Create.get_text_image()
        image = prev.convert_to_gray(image)
        batch_x[i ,:] = image.flatten()/225
batch_y[i ,:] = prev.text2vec(text,Max_text , Char_size)
        pass
    return batch_x , batch_y
    pass
def crack_captcha(captcha_image):
    output = train_cell(x)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, "./model/crack_capcha.model-1010")

        predict = tf.argmax(tf.reshape(output, [-1, Max_text, Char_size]), 2)
        text_list = sess.run(predict, feed_dict={x: [captcha_image], keep_cell: 1})
        text = text_list[0].tolist()
        return text
if __name__ == '__main__':
    train = 1
if train == 0:
        number = ['0','1','2','3','4','5','6','7','8','9']
        #alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
        #ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
text, image = image_Create.get_text_image()
        image_height , image_width = (60,160)
        char_set = number
        Max_text = len(text)
        Char_size = len(char_set)
        x = tf.placeholder(tf.float32 , [None , image_height*image_width])
        y = tf.placeholder(tf.float32 , [None , Max_text*Char_size])
        keep_cell = tf.placeholder(tf.float32 )
        train_CNN(x , y)

    if train == 1:
        number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
        image_height = 60
image_width = 160
char_set = number
        Char_size = len(char_set)

        text, image = image_Create.get_text_image()

        f = plt.figure()
        ax = f.add_subplot(111)
        ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
        plt.imshow(image)

        plt.show()

        Max_text = len(text)
        image = prev.convert_to_gray(image)
        image = image.flatten() / 255
x = tf.placeholder(tf.float32, [None, image_height * image_width])
        y = tf.placeholder(tf.float32, [None, Max_text * Char_size])
        keep_cell = tf.placeholder(tf.float32)  # dropout
predict_text = crack_captcha(image)
        print("正確: {}  預測: {}".format(text, predict_text))

首先要定義網路。

def train_cell(x , w_alpha = 0.01 , b_alpha = 0.1):
    x = tf.reshape(x , shape=[-1 , image_height , image_width , 1])
    w_c1 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 1 , 32]))
    b_c1 = tf.Variable(b_alpha * tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x , w_c1 , strides=[1 , 1 , 1 , 1] , padding='SAME') , b_c1))
    conv1 = tf.nn.max_pool(conv1 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv1 = tf.nn.dropout(conv1 , keep_cell)

    w_c2 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 32 , 64]))
    b_c2 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1 , w_c2 ,strides=[1 , 1 , 1 , 1],padding='SAME'),b_c2))
    conv2 = tf.nn.max_pool(conv2 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv2 = tf.nn.dropout(conv2 , keep_cell)

    w_c3 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 64 , 64]))
    b_c3 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2 , w_c3 , strides=[1 , 1 , 1 , 1],padding='SAME'),b_c3))
    conv3 = tf.nn.max_pool(conv3 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv3 = tf.nn.dropout(conv3 , keep_cell)

    w_d = tf.Variable(w_alpha * tf.random_normal([8 * 20 * 64 , 1024]))
    b_d = tf.Variable(b_alpha * tf.random_normal([1024]))
    dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
    dense = tf.nn.dropout(dense, keep_cell)

    w_out = tf.Variable(w_alpha*tf.random_normal([1024, Max_text * Char_size]))
    b_out = tf.Variable(b_alpha*tf.random_normal([Max_text * Char_size]))
    out = tf.add(tf.matmul(dense, w_out), b_out)
    return out
    pass

之前都講過了,應該都有b數的。三層卷積網路,兩層全連線網路。

之後就是一個損失函數了:

def train_CNN(x , y):
    output = train_cell(x)
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output , labels=y))
    optimer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    predict = tf.reshape(output, [-1, Max_text, Char_size])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(y, [-1, Max_text , Char_size]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        step = 0
while True:
            batch_x, batch_y = get_next_batch(64)
            _, loss_ = sess.run([optimer, loss], feed_dict={x: batch_x, y: batch_y, keep_cell: 0.75})
            print('第',step,'步','損失', loss_)

            # 每100 step計算一次準確率
if step % 10 == 0:
                batch_x_test, batch_y_test = get_next_batch(100)
                acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_cell: 1.})
                print('第',step,'步', '準確',acc)
                # 如果準確率大於50%,儲存模型,完成訓練
if step > 1000:
                    saver.save(sess, "./model/crack_capcha.model", global_step=step)
                    break
step+=1
pass

可以看到使用的優化器並不是梯度下降,但是使用梯度下降也可以。。。但是要把學習率設定的很小,之前都試過,這個是最快的了。儲存模型。。。應該懂的。

這裡要用到幾個圖片的預處理:

import numpy as np
import tensorflow as tf
def convert_to_gray(image):
    if len(image.shape) > 2:
        gray = np.mean(image , -1)
        return gray
    else:
        return image

def text2vec(text , max_text , char_size):
    text_len = len(text)
    vector = np.zeros(max_text*char_size)
    for i, c in enumerate(text):
        idx = i * char_size + int(c)
        vector[idx] = 1
return vector


np.mean(image,-1)是為了轉換成灰度圖

會放在pev.py下

def get_next_batch(batch_size = 128):
    batch_x = np.zeros([batch_size , image_height * image_width])
    batch_y = np.zeros([batch_size , Max_text * Char_size])
    for i in range(batch_size):
        text , image = image_Create.get_text_image()
        image = prev.convert_to_gray(image)
        batch_x[i ,:] = image.flatten()/225
batch_y[i ,:] = prev.text2vec(text,Max_text , Char_size)
        pass
    return batch_x , batch_y
    pass

這個是讀取模型。1010就是訓練了1010次後得到的模型。

def crack_captcha(captcha_image):
    output = train_cell(x)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, "./model/crack_capcha.model-1010")

        predict = tf.argmax(tf.reshape(output, [-1, Max_text, Char_size]), 2)
        text_list = sess.run(predict, feed_dict={x: [captcha_image], keep_cell: 1})
        text = text_list[0].tolist()
        return text

接下來就是運行了:

if __name__ == '__main__':
    train = 1
if train == 0:
        number = ['0','1','2','3','4','5','6','7','8','9']
        #alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
        #ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
text, image = image_Create.get_text_image()
        image_height , image_width = (60,160)
        char_set = number
        Max_text = len(text)
        Char_size = len(char_set)
        x = tf.placeholder(tf.float32 , [None , image_height*image_width])
        y = tf.placeholder(tf.float32 , [None , Max_text*Char_size])
        keep_cell = tf.placeholder(tf.float32 )
        train_CNN(x , y)

    if train == 1:
        number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
        image_height = 60
image_width = 160
char_set = number
        Char_size = len(char_set)

        text, image = image_Create.get_text_image()

        f = plt.figure()
        ax = f.add_subplot(111)
        ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
        plt.imshow(image)

        plt.show()

        Max_text = len(text)
        image = prev.convert_to_gray(image)
        image = image.flatten() / 255
x = tf.placeholder(tf.float32, [None, image_height * image_width])
        y = tf.placeholder(tf.float32, [None, Max_text * Char_size])
        keep_cell = tf.placeholder(tf.float32)  # dropout
predict_text = crack_captcha(image)
        print("正確: {}  預測: {}".format(text, predict_text))

一開始訓練很慢的,訓練1010次後得到的模型的準確率是0.72。


還算過得去吧。。。。。

def train_cell(x , w_alpha = 0.01 , b_alpha = 0.1):
    x = tf.reshape(x , shape=[-1 , image_height , image_width , 1])
    w_c1 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 1 , 32]))
    b_c1 = tf.Variable(b_alpha * tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x , w_c1 , strides=[1 , 1 , 1 , 1] , padding='SAME') , b_c1))
    conv1 = tf.nn.max_pool(conv1 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv1 = tf.nn.dropout(conv1 , keep_cell)

    w_c2 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 32 , 64]))
    b_c2 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1 , w_c2 ,strides=[1 , 1 , 1 , 1],padding='SAME'),b_c2))
    conv2 = tf.nn.max_pool(conv2 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv2 = tf.nn.dropout(conv2 , keep_cell)

    w_c3 = tf.Variable(w_alpha * tf.random_normal([3 , 3 , 64 , 64]))
    b_c3 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2 , w_c3 , strides=[1 , 1 , 1 , 1],padding='SAME'),b_c3))
    conv3 = tf.nn.max_pool(conv3 , ksize=[1 , 2 , 2 , 1] , strides=[1 , 2 , 2 , 1],padding='SAME')
    conv3 = tf.nn.dropout(conv3 , keep_cell)

    w_d = tf.Variable(w_alpha * tf.random_normal([8 * 20 * 64 , 1024]))
    b_d = tf.Variable(b_alpha * tf.random_normal([1024]))
    dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
    dense = tf.nn.dropout(dense, keep_cell)

    w_out = tf.Variable(w_alpha*tf.random_normal([1024, Max_text * Char_size]))
    b_out = tf.Variable(b_alpha*tf.random_normal([Max_text * Char_size]))
    out = tf.add(tf.matmul(dense, w_out), b_out)
    return out
    pass
def train_CNN(x , y):
    output = train_cell(x)
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output , labels=y))
    optimer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    predict = tf.reshape(output, [-1, Max_text, Char_size])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(y, [-1, Max_text , Char_size]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        step = 0
while True:
            batch_x, batch_y = get_next_batch(64)
            _, loss_ = sess.run([optimer, loss], feed_dict={x: batch_x, y: batch_y, keep_cell: 0.75})
            print('第',step,'步','損失', loss_)

            # 每100 step計算一次準確率
if step % 10 == 0:
                batch_x_test, batch_y_test = get_next_batch(100)
                acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_cell: 1.})
                print('第',step,'步', '準確',acc)
                # 如果準確率大於50%,儲存模型,完成訓練
if step > 1000:
                    saver.save(sess, "./model/crack_capcha.model", global_step=step)
                    break
step+=1
pass