1. 程式人生 > >tensorflow入門 數字手寫體識別python實現

tensorflow入門 數字手寫體識別python實現

第一部分:訓練模型

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

import tensorflow as tf

sess = tf.InteractiveSession()

x = tf.placeholder(tf.float32, shape=[None, 784])      # placeholder( dtype, shape=None, name=None )
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784,10]))        # tf.Variable(, name=, dtype=None, trainable=True)
b = tf.Variable(tf.zeros([10]))            # 建立一個值為initial-value的新變數
sess.run(tf.global_variables_initializer())

y = tf.matmul(x,W) + b        # tf.matmul()是專門矩陣或者tensor乘法

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))   # tf.reduce_mean()對所有的元素求平均

train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)   # 梯度下降優化器,(0.1)學習率,最小化損失函式

for _ in range(5000):          #迭代 5000 次學習
  batch = mnist.train.next_batch(100)
  train_step.run(feed_dict={x: batch[0], y_: batch[1]})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))  #tf.equal(A, B)是對比這兩個矩陣或者向量的相等的元素,如果是相等的那就返回True,反正返回False

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))     # cast(x, dtype, name=None) 將x的資料格式轉化成dtype

print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)   # 截斷的正態分佈噪聲,標準差為0.1(給W製造一些隨機噪聲打破完全對稱)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)    # 會使用到ReLU,增加一些小的正值(0.1)用來避免死亡節點
  return tf.Variable(initial)

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 # tf.nn.conv2d二維卷積函式,strides卷積模板的移動步長,padding='SAME'邊界處理方式:卷積輸入輸出保持同樣的尺寸。

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
  # 2*2最大池化,保留原始畫素塊中灰度值最高的那一個畫素,即保留最顯著的特徵,橫豎以2為步長

x_image = tf.reshape(x, [-1,28,28,1])   # -1代表樣本數量不固定,1代表顏色通道數量為1(黑白)

W_conv1 = weight_variable([5, 5, 1, 32])    # 卷積核尺寸5*5,顏色通道數1,32個不同的卷積核
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)       # relu()啟用函式,非線性處理
h_pool1 = max_pool_2x2(h_conv1)         # 池化函式

W_conv2 = weight_variable([5, 5, 32, 64])    #卷積核數目64
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# 圖片尺寸由28*28變為7*7,tensor尺寸7*7*64=3136

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)           # dropout必須設定概率keep_prob,並且keep_prob也是一個佔位符,跟輸入是一樣的
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)     # tf實現dropout,避免過擬合(test loss 會大於train loss還比較多)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

saver = tf.train.Saver()  # defaults to saving all variables

sess.run(tf.global_variables_initializer())
for i in range(20000):
  batch = mnist.train.next_batch(50)
  if i%100 == 0:
    train_accuracy = accuracy.eval(feed_dict={
        x:batch[0], y_: batch[1], keep_prob: 1.0})            # dropout_keep_prob一般都設定為1,也就是保留全部結果;只在訓練的時候有用
    print("step %d, training accuracy %g"%(i, train_accuracy))

  train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

save_path = saver.save(sess,'C:/Users/Mobile Workstation01/PycharmProjects/number recognition')  #儲存模型引數,改為自己的路徑
sess.close()

第二部分:數字識別

from skimage import io,transform
import numpy as np
from PIL import Image
import tensorflow as tf

################ picture propressing #############
img = io.imread('D:/PICTURE/ranqibiao-Results/4.png')  #載入待識別圖片路徑
img = transform.resize(img,(100,100))
io.imsave('D:/PICTURE/num.png',img)
# RGB 轉換為二值化圖
img = Image.open('D:/PICTURE/num.png')
img = img.convert('L')   # 轉化為灰度圖
img = img.convert('1')   # 轉化為二值化圖
img.save('D:/PICTURE/b.png')
# picture resize
w=28
h=28
# Standard size
N = 28
#Gray threshold閾值
color = 100/255
img = io.imread('D:/PICTURE/b.png')
def CutPicture(img):
    #初始化新大小
    size = []
    #圖片的行數
    length = len(img)
    #圖片的列數
    width = len(img[0,:])
    #計算新大小
    #print("long",length)
    #print("width",width)
    size.append(JudgeEdge(img, length, 0, [-1, -1]))
    size.append(JudgeEdge(img, width, 1, [-1, -1]))
    size = np.array(size).reshape(4)
    #print("size", size)
    size1 = [-6,6,-6,6]
    size = size + size1
    #print("size", size)
    return img[size[0]:size[1]+1, size[2]:size[3]+1]

def JudgeEdge(img, length, flag, size):
    for i in range(length):
        #Cow or Column 判斷是行是列
        if flag == 0:
            #Positive sequence 正序判斷該行是否有手寫數字
            line1 = img[img[i,:]<color]
            #Negative sequence 倒序判斷該行是否有手寫數字
            line2 = img[img[length-1-i,:]<color]
        else:
            line1 = img[img[:,i]<color]
            line2 = img[img[:,length-1-i]<color]
        #If edge, recode serial number 若有手寫數字,即到達邊界,記錄下行
        if len(line1)>=1 and size[0]==-1:
            size[0] = i
        if len(line2)>=1 and size[1]==-1:
            size[1] = length-1-i
        #If get the both of edge, break 若上下邊界都得到,則跳出
        if size[0]!=-1 and size[1]!=-1:
            break
    return size
# 圖片切割
img = CutPicture(img)
io.imsave('D:/PICTURE/c.png',img)
# 圖片拉伸
img = transform.resize(img,(w,h))
io.imsave('D:/PICTURE/d.png',img)

################ picture recognition #############
def imageprepare():
    file_name='D:/PICTURE/d.png'#匯入自己的圖片地址
    #in terminal 'mogrify -format png *.jpg' convert jpg to png
    im = Image.open(file_name)
    #tv = im.getpixel()
    tv = list(im.getdata()) #get pixel values
    #print(tv)           #二值影象0表示黑,255表示白
    #normalize pixels to 0 and 1. 0 is pure white, 1 is pure black.
    tva = [ (x - 255) * 1.0 / 255.0 for x in tv]
    #print(tva)
    for i in range(len(tva)):
        if tva[i] == 256.0:
            tva[i] = 0
        else:
            tva[i] = 1
    #print(tva)
    return tva

# Define the model (same as when creating the model file)
result=imageprepare()
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

init_op = tf.global_variables_initializer()

"""
Load the model2.ckpt file
file is stored in the same directory as this python script is started
Use the model to predict the integer. Integer is returend as list.

Based on the documentatoin at
https://www.tensorflow.org/versions/master/how_tos/variables/index.html
"""
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init_op)
    saver.restore(sess,'C:/Users/Mobile Workstation01/PycharmProjects/number recognition')  #這裡使用了之前儲存的模型引數
    prediction=tf.argmax(y_conv,1)

    predint=prediction.eval(feed_dict={x: [result],keep_prob: 1.0}, session=sess)
    #print(h_conv2)
    print('recognize result:')
    print(predint[0])