1. 程式人生 > >自寫卷積工具包+VGG模型預訓練+自寫模型

自寫卷積工具包+VGG模型預訓練+自寫模型

本教程是Kevin大神的最後一個教程,內容很充實

瀏覽檔案大綱:


這些都是本教程的內容,很充實,程式碼寫完後你將會學到的知識有:對訓練資料的處理,怎麼分批次訓練;卷積過程步驟流程券走通,還可以泛用到其他卷積網路;VGG模型的實操;遷移VGG原模型的引數進行訓練;tensorboard的開啟實時檢視訓練情況等等。

1.第一個檔案,卷積工具包,檔案命名為:tool.py,內容截圖如下;



是不是包含及基本卷積需要用到的很多方法;以後也是可以呼叫的

這部分的程式碼如下:

# 這是一個卷積模型包,包含所有成分,呼叫這個可以方便的寫出模型,VGG16就是呼叫這個來寫的
import tensorflow as tf
import numpy as np


#%%
def conv(layer_name, x, out_channels, kernel_size=[3,3], stride=[1,1,1,1], is_pretrain=True):
    '''Convolution op wrapper, use RELU activation after convolution
    Args:
        layer_name: e.g. conv1, pool1...
        x: input tensor, [batch_size, height, width, channels]
        out_channels: number of output channels (or comvolutional kernels)
        kernel_size: the size of convolutional kernel, VGG paper used: [3,3]
        stride: A list of ints. 1-D of length 4. VGG paper used: [1, 1, 1, 1]
        is_pretrain: if load pretrained parameters, freeze all conv layers. 
        Depending on different situations, you can just set part of conv layers to be freezed.
        the parameters of freezed layers will not change when training.
    Returns:
        4D tensor
    '''

    in_channels = x.get_shape()[-1]
    with tf.variable_scope(layer_name):
        w = tf.get_variable(name='weights',
                            trainable=is_pretrain,   # 為false時為固定住某層,讓他不訓練
                            shape=[kernel_size[0], kernel_size[1], in_channels, out_channels],
                            initializer=tf.contrib.layers.xavier_initializer())
        # default is uniform distribution initialization
        # 這裡的初始化形式同以往不一樣
        b = tf.get_variable(name='biases',
                            trainable=is_pretrain,
                            shape=[out_channels],
                            initializer=tf.constant_initializer(0.0))
        x = tf.nn.conv2d(x, w, stride, padding='SAME', name='conv')
        x = tf.nn.bias_add(x, b, name='bias_add')
        x = tf.nn.relu(x, name='relu')
        return x

#%% 池化層的核是2*2,步長2,均採用最大池化
def pool(layer_name, x, kernel=[1,2,2,1], stride=[1,2,2,1], is_max_pool=True):
    '''Pooling op
    Args:
        x: input tensor
        kernel: pooling kernel, VGG paper used [1,2,2,1], the size of kernel is 2X2
        stride: stride size, VGG paper used [1,2,2,1]
        padding:
        is_max_pool: boolen
                    if True: use max pooling
                    else: use avg pooling
    '''
    if is_max_pool:
        x = tf.nn.max_pool(x, kernel, strides=stride, padding='SAME', name=layer_name)
    else:
        x = tf.nn.avg_pool(x, kernel, strides=stride, padding='SAME', name=layer_name)
    return x

#%%  輸入批標準化,效果非常好,注意裡面兩個函式的用法
def batch_norm(x):
    '''Batch normlization(I didn't include the offset and scale)
    '''
    epsilon = 1e-3
    batch_mean, batch_var = tf.nn.moments(x, [0])   # 均值和方差
    x = tf.nn.batch_normalization(x,
                                  mean=batch_mean,
                                  variance=batch_var,
                                  offset=None,
                                  scale=None,
                                  variance_epsilon=epsilon)
    return x


# %% 全連線層,注意要改變它的輸入值的形狀,把它拉直
def FC_layer(layer_name, x, out_nodes):
    '''Wrapper for fully connected layers with RELU activation as default
    Args:
        layer_name: e.g. 'FC1', 'FC2'
        x: input feature map
        out_nodes: number of neurons for current FC layer
    '''
    shape = x.get_shape()
    # 這裡有兩種拉直方式,第一種對4維的進行拉直,也就是卷積完之後使用,另一種是全連線之後的全連線使用
    if len(shape) == 4:
        size = shape[1].value * shape[2].value * shape[3].value
    else:
        size = shape[-1].value

    with tf.variable_scope(layer_name):
        w = tf.get_variable('weights',
                            shape=[size, out_nodes],
                            initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable('biases',
                            shape=[out_nodes],
                            initializer=tf.constant_initializer(0.0))
        flat_x = tf.reshape(x, [-1, size])  # flatten into 1D 把輸入資料拉直

        x = tf.nn.bias_add(tf.matmul(flat_x, w), b)
        x = tf.nn.relu(x)
        return x

# 求損失,我沒用one-hot
def loss(logits, labels):
    '''Compute loss
    Args:
        logits: logits tensor, [batch_size, n_classes]
        labels: one-hot labels
    '''
    with tf.name_scope('loss') as scope:
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels,name='cross-entropy')
        loss = tf.reduce_mean(cross_entropy, name='loss')
        tf.summary.scalar(scope+'/loss', loss)
        return loss

# 準確度
def accuracy(logits, labels):
  """Evaluate the quality of the logits at predicting the label.
  Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, 
  """
  with tf.name_scope('accuracy') as scope:
      correct = tf.equal(tf.arg_max(logits, 1), tf.arg_max(labels, 1))
      correct = tf.cast(correct, tf.float32)
      accuracy = tf.reduce_mean(correct)*100.0
      tf.summary.scalar(scope+'/accuracy', accuracy)
  return accuracy

# 優化器,使用預設梯度下降法
def optimize(loss, learning_rate, global_step):
    '''optimization, use Gradient Descent as default
    '''
    with tf.name_scope('optimizer'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return train_op


# 以下是遷移學習需要用到的部件

#  %% 載入原始VGG16檔案的引數,全部載入
def load(data_path, session):
    data_dict = np.load(data_path, encoding='latin1').item()

    keys = sorted(data_dict.keys())
    for key in keys:
        with tf.variable_scope(key, reuse=True):
            for subkey, data in zip(('weights', 'biases'), data_dict[key]):
                session.run(tf.get_variable(subkey).assign(data))


# %%   測試載入進來的引數
def test_load():
    data_path = './/VGG-pretrain//vgg16.npy'  # 檔案儲存路徑
    # 注意這個檔案要到網上自行下載
    data_dict = np.load(data_path, encoding='latin1').item()
    keys = sorted(data_dict.keys())
    for key in keys:
        weights = data_dict[key][0]
        biases = data_dict[key][1]
        print('\n')
        print(key)
        print('weights shape: ', weights.shape)
        print('biases shape: ', biases.shape)


# %%   跳級載入,就是不是每個層都要載入它的引數
def load_with_skip(data_path, session, skip_layer):
    data_dict = np.load(data_path, encoding='latin1').item()
    for key in data_dict:
        if key not in skip_layer:
            with tf.variable_scope(key, reuse=True):
                for subkey, data in zip(('weights', 'biases'), data_dict[key]):
                    session.run(tf.get_variable(subkey).assign(data))

# 列印可訓練的引數,可以執行來看看。
def print_all_variables(train_only=True):
    """Print all trainable and non-trainable variables
    without tl.layers.initialize_global_variables(sess)

    Parameters
    ----------
    train_only : boolean
        If True, only print the trainable variables, otherwise, print all variables.
    """
    # tvar = tf.trainable_variables() if train_only else tf.all_variables()
    if train_only:
        t_vars = tf.trainable_variables()
        print("  [*] printing trainable variables")
    else:
        try: # TF1.0
            t_vars = tf.global_variables()
        except: # TF0.12
            t_vars = tf.all_variables()
        print("  [*] printing global variables")
    for idx, v in enumerate(t_vars):
        print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))

2.第二個部件就是VGG模型,用上面這個tool.py來建立,非常簡單,具體模型可以參考對應論文,以及網上的解說。

以下給了兩種方案,主要區別是就是tensorboard Graph的區別,建議使用第二種。

# vgg16 模型
import tensorflow as tf
import tool


# %% 這是原始VGG16的模型,有兩種方法,第二種用了命名範圍,就可以把整個模型顯示在tensorboard上面。自選一種
# def VGG16(x, n_classes, is_pretrain=True):
#     x = tool.conv('conv1_1', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv1_2', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.pool('pool1', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)
#
#     x = tool.conv('conv2_1', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv2_2', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.pool('pool2', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)
#
#     x = tool.conv('conv3_1', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv3_2', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv3_3', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.pool('pool3', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)
#
#     x = tool.conv('conv4_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv4_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv4_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.pool('pool3', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)
#
#     x = tool.conv('conv5_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv5_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.conv('conv5_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
#     x = tool.pool('pool3', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)
#
#     x = tool.FC_layer('fc6', x, out_nodes=4096)
#     # 全連線後有兩種做法,一是BN演算法,如下:另是dropout演算法,有時間可以做下對比
#     # x = tools.batch_norm(x)
#     x = tool.FC_layer('fc7', x, out_nodes=4096)
#     # x = tools.batch_norm(x)
#     x = tool.FC_layer('fc8', x, out_nodes=n_classes)
#
#     return x


def VGG16N(x, n_classes, is_pretrain=True):
    with tf.name_scope('VGG16'):
        x = tool.conv('conv1_1', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv1_2', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool1'):
            x = tool.pool('pool1', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tool.conv('conv2_1', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv2_2', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool2'):
            x = tool.pool('pool2', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tool.conv('conv3_1', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv3_2', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv3_3', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool3'):
            x = tool.pool('pool3', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tool.conv('conv4_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv4_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv4_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool4'):
            x = tool.pool('pool4', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tool.conv('conv5_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv5_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tool.conv('conv5_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool5'):
            x = tool.pool('pool5', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tool.FC_layer('fc6', x, out_nodes=4096)
        # with tf.name_scope('batch_norm1'):
        # x = tools.batch_norm(x)
        x = tool.FC_layer('fc7', x, out_nodes=4096)
        # with tf.name_scope('batch_norm2'):
        # x = tools.batch_norm(x)
        x = tool.FC_layer('fc8', x, out_nodes=n_classes)

        return x

3.第三個檔案就是VGG的訓練檔案,這裡有很多需要注意的地方。所以採用程式碼分段的方式敘述。

匯入需要的庫:

import os
import os.path

import numpy as np
import tensorflow as tf
import input_data   # 之前訓練CIFAR10的input_data
import VGG16_model  # 你的VGG模型
import tool          # 卷積網路工具包,自己寫的

這裡的input_data在我的教程CIFAR10裡面有,這裡也給大家提供,注意著部分訓練要用到CIFAR10的資料,建議先完成CIFAR10的訓練再來做這個比較好。

input_data.py:

#########################################################
#輸入資料
#CIFAR10共有60000張圖片,其中50000張彩色影象用於訓練,大小為32*32;10000張用於測試。在官網下載二進位制資料,
# 這個型別的資料是分批次的,不會一起加入記憶體。地址http://www.cs.toronto.edu/~kriz/cifar.html,版本為Binary version
#資料的存放格式如https://zhuanlan.zhihu.com/p/26141396描述
# 一張影象佔了3073位,第一位是標籤,其餘的32*32*3=3072位是影象畫素,注意程式碼會根據這個來提取標籤和影象size
#對資料的處理主要步驟如下:
#1.讀取的資料
#2.加入佇列用tf.train.string_input_producer()
#3.用tf.FixedLengthRecordReader讀取佇列的內容,這個讀取資料需要資料是等尺寸的,剛好適用這個CIFAR的讀取
#4.解碼,因為原始image是二進位制形式的,解碼成unit8
#5.重構影象的size,為img_depth, img_height, img_width
##########################################################################

import tensorflow as tf
import numpy as np
import os


# %% Reading data

def read_cifar10(data_dir, is_train, batch_size, shuffle):
    """Read CIFAR10

    Args:
        data_dir: the directory of CIFAR10
        is_train: boolen
        batch_size:
        shuffle:       
    Returns:
        label: 1D tensor, tf.int32
        image: 4D tensor, [batch_size, height, width, 3], tf.float32

    """
    img_width = 32
    img_height = 32
    img_depth = 3
    label_bytes = 1
    image_bytes = img_width * img_height * img_depth

    with tf.name_scope('input'):

        if is_train:
            filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % ii)
                         for ii in np.arange(1, 6)]
        else:
            filenames = [os.path.join(data_dir, 'test_batch.bin')]

        filename_queue = tf.train.string_input_producer(filenames)

        reader = tf.FixedLengthRecordReader(label_bytes + image_bytes)

        key, value = reader.read(filename_queue)

        record_bytes = tf.decode_raw(value, tf.uint8)

        label = tf.slice(record_bytes, [0], [label_bytes])
        label = tf.cast(label, tf.int32)

        image_raw = tf.slice(record_bytes, [label_bytes], [image_bytes])
        image_raw = tf.reshape(image_raw, [img_depth, img_height, img_width])
        image = tf.transpose(image_raw, (1, 2, 0))  # convert from D/  /W to H/W/D
        image = tf.cast(image, tf.float32)

        #        # data argumentation

        #        image = tf.random_crop(image, [24, 24, 3])# randomly crop the image size to 24 x 24
        #        image = tf.image.random_flip_left_right(image)
        #        image = tf.image.random_brightness(image, max_delta=63)
        #        image = tf.image.random_contrast(image,lower=0.2,upper=1.8)



        image = tf.image.per_image_standardization(image)  # substract off the mean and divide by the variance

        if shuffle:
            images, label_batch = tf.train.shuffle_batch(
                [image, label],
                batch_size=batch_size,
                num_threads=64,
                capacity=20000,
                min_after_dequeue=3000)
        else:
            images, label_batch = tf.train.batch(
                [image, label],
                batch_size=batch_size,
                num_threads=64,
                capacity=2000)
        ## ONE-HOT
        n_classes = 10
        label_batch = tf.one_hot(label_batch, depth=n_classes)
        label_batch = tf.cast(label_batch, dtype=tf.int32)
        label_batch = tf.reshape(label_batch, [batch_size, n_classes])

        return images, label_batch

# %%y

超引數的設定

# 超引數的設定,用這個網路訓練CIFAR10
IMG_W = 32
IMG_H = 32
N_CLASSES = 10
BATCH_SIZE = 32
learning_rate = 0.01
MAX_STEP = 15000   # it took me about one hour to complete the training.
IS_PRETRAIN = True

原作者這部分是訓練加驗證,但是我的驗證出了問題,就只有訓練部分。這部分需要用到vgg16.npy這個檔案,去網上搜來下載,下載有問題的可以私信我給你。這個檔案裡就是訓練的網路和引數,用在這裡預訓練。

# %%   Training 注意修改路徑以及涉及到的函式名字
def train():
    pre_trained_weights = 'D:/Python/neural network/VGG-Kevin/VGG-pretrain/vgg16.npy'       # 下載的vgg16.npy地址
    data_dir = 'D:/Python/neural network/CIFAR10-Guoqingxu/data/'   # 把這個資料載入進來
    train_log_dir = './/logs//train//'     # 新增訓練完成後的日誌地址
    with tf.name_scope('input'):
        tra_image_batch, tra_label_batch = input_data.read_cifar10(data_dir=data_dir,
                                                                   is_train=True,
                                                                   batch_size=BATCH_SIZE,
                                                                   shuffle=True)

    logits = VGG16_model.VGG16N(tra_image_batch, N_CLASSES, IS_PRETRAIN)
    loss = tool.loss(logits,tra_label_batch)
    accuracy = tool.accuracy(logits, tra_label_batch)

    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = tool.optimize(loss, learning_rate, my_global_step)

    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    # load the parameter file, assign the parameters, skip the specific layers
    # 只加載原始檔案的卷積層部分的引數,全連線層隨機初始化
    tool.load_with_skip(pre_trained_weights, sess, ['fc6', 'fc7', 'fc8'])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph)
    # 列印模型的變數(引數)資訊
    tool.print_all_variables(train_only=True)
    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break

            tra_images, tra_labels = sess.run([tra_image_batch, tra_label_batch])
            _, tra_loss, tra_acc = sess.run([train_op, loss, accuracy])
            if step % 50 == 0 or (step + 1) == MAX_STEP:
                print('Step: %d, loss: %.4f, accuracy: %.4f%%' % (step, tra_loss, tra_acc))
                summary_str = sess.run(summary_op)
                tra_summary_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()

到這裡程式碼部分就結束了。用別人的程式碼一定要注意改地址的地方,我已經備註好了。

這3個程式碼只需要執行最後一個就好了,在最後新增train()就可以執行。執行之後我們去tensorboard看結果,只要開始執行,就會在logs目錄下出現一個events事件,就可以開啟來看看執行情況,以下做演示:


1.開啟終端 快捷鍵win +r

2.切換到你專案所在的地址:該專案為VGG-Kevin

3.鍵入Tensorboard --logdir==.//logs//


4.複製網址到谷歌瀏覽器

然後發現開啟來什麼都沒有,tensorboard我一直為搞明白,偶爾開啟有東西偶爾開啟沒,要嘗試各種開啟方法。

5,隔了一天用另一種方法打開了。


train是上一級目錄。

結果展示:






只訓練了8000步,收斂的不好。可以多嘗試幾種微調方法