1. 程式人生 > >Tensorflow學習記錄10--VGG網路

Tensorflow學習記錄10--VGG網路

1 VGG網路總結

感覺就是再alex-net的基礎上,研究了下如何加深網路來提高效能的。總體上也是五層卷積加上三層全連結,但是這五層卷積中都會以pooling來分割,且五層卷積嘗試疊加多層卷積再一起,並且嘗試以更小的核以及提高核的數量來提高網路的效能,比如alex-net的核的大小為11×11×96不等,vgg網路一般都是用3×3的核,但是她核的數量提高了很多,有3×3×256不等,來提高效能。即通過降低filter的大小,增加層數來達到同樣的效果。
vgg的模型比alex-net大很多,訓練出來的引數大概有500m,且訓練時間長,幸好有訓練好的引數可以使用,例如VGG-16,VGG-19等,這倆個效果還不錯,且在網上可以下載使用。

2 VGG網路模型

總共探討了一下幾種型別,從A到E這幾種型別,由上到下都是五層卷積加3層全連結。
這裡寫圖片描述

3 VGG網路的實現

這裡你能訓練自己的VGG模型,也能夠載入已有的vgg模型來對影象進行分類,其中vgg19的模型的程式碼如下,寫的很漂亮,把所有東西都放在這個類裡面。


import numpy as np
import tensorflow as tf

_VGG_MEAN = [103.939, 116.779, 123.68]


class Vgg19:
    """
    A VGG-19 Network implementation using TensorFlow library.
    The network takes an image of size 224x224 with RGB channels and returns
    category scores of size 1000.
    The network configuration:
    - RGB: 224x224x3
    - BGR: 224x224x3
    - conv1: 224x224x64
    - conv2: 112x112x128
    - conv3: 56x56x256
    - conv4: 28x28x512
    - conv5: 14x14x512
    - fc6: 25088(=7x7x512)x4096
    - fc7: 4096x4096
    - fc8: 4096x1000
    """
WIDTH = 224 "The fixed width of the input image." HEIGHT = 224 "The fixed height of the input image." CHANNELS = 3 "The fixed channels number of the input image." model = {} "The model storing the kernels, weights and biases." model_save_path = None "The model save path, especially for the training process."
model_save_freq = 0 """ The frequency to save the model in the training process. e.g. Save the model every 1000 iteration. """ learning_rate = 0.05 "Learning rate for the gradient descent." _inputRGB = None _inputBGR = None _inputNormalizedBGR = None _conv1_1 = None _conv1_2 = None _pool1 = None _conv2_1 = None _conv2_2 = None _pool2 = None _conv3_1 = None _conv3_2 = None _conv3_3 = None _conv3_4 = None _pool3 = None _conv4_1 = None _conv4_2 = None _conv4_3 = None _conv4_4 = None _pool4 = None _conv5_1 = None _conv5_2 = None _conv5_3 = None _conv5_4 = None _pool5 = None _fc6 = None _relu6 = None _fc7 = None _relu7 = None _fc8 = None _preds = None "The predictions tensor, shape of [?, 1000]" _loss = None _optimizer = None _train_labels = None "The train labels tensor, a placeholder." def __init__(self, model=None, model_save_path=None, model_save_freq=0): """ :param model: The model either for back-propagation or :param model_save_path: The model path for training process. :param model_save_freq: Save the model (in training process) every N iterations. forward-propagation. """ self.model = self._init_empty_model() if not model else model self.model_save_path = model_save_path self.model_save_freq = model_save_freq # Define the train labels. self._train_labels = tf.placeholder(tf.float32, [None, 1000]) # Define the input placeholder with RGB channels. # Size: 224x224x3 self._inputRGB = tf.placeholder(tf.float32, [None, Vgg19.WIDTH, Vgg19.HEIGHT, Vgg19.CHANNELS]) # Convert RGB to BGR order # Size: 224x224x3 red, green, blue = tf.split(3, 3, self._inputRGB) self._inputBGR = tf.concat(3, [ blue, green, red, ]) # normalize the input so that the elements all have nearly equal # variances. # Size: 224x224x3 self._inputNormalizedBGR = tf.concat(3, [ blue - _VGG_MEAN[0], green - _VGG_MEAN[1], red - _VGG_MEAN[2], ]) # Setup the VGG-Net graph. # Size: 224x224x64 self._conv1_1 = self._conv_layer(self._inputNormalizedBGR, "conv1_1") self._conv1_2 = self._conv_layer(self._conv1_1, "conv1_2") # Size: 112x112x64 self._pool1 = self._max_pool(self._conv1_2, 'pool1') # Size: 112x112x128 self._conv2_1 = self._conv_layer(self._pool1, "conv2_1") self._conv2_2 = self._conv_layer(self._conv2_1, "conv2_2") # Size: 56x56x128 self._pool2 = self._max_pool(self._conv2_2, 'pool2') # Size: 56x56x256 self._conv3_1 = self._conv_layer(self._pool2, "conv3_1") self._conv3_2 = self._conv_layer(self._conv3_1, "conv3_2") self._conv3_3 = self._conv_layer(self._conv3_2, "conv3_3") self._conv3_4 = self._conv_layer(self._conv3_3, "conv3_4") # Size: 28x28x256 self._pool3 = self._max_pool(self._conv3_4, 'pool3') # Size: 28x28x512 self._conv4_1 = self._conv_layer(self._pool3, "conv4_1") self._conv4_2 = self._conv_layer(self._conv4_1, "conv4_2") self._conv4_3 = self._conv_layer(self._conv4_2, "conv4_3") self._conv4_4 = self._conv_layer(self._conv4_3, "conv4_4") # Size: 14x14x512 self._pool4 = self._max_pool(self._conv4_4, 'pool4') # Size: 14x14x512 self._conv5_1 = self._conv_layer(self._pool4, "conv5_1") self._conv5_2 = self._conv_layer(self._conv5_1, "conv5_2") self._conv5_3 = self._conv_layer(self._conv5_2, "conv5_3") self._conv5_4 = self._conv_layer(self._conv5_3, "conv5_4") # Size: 7x7x512 self._pool5 = self._max_pool(self._conv5_4, 'pool5') # Size: 25088(=7x7x512)x4096 self._fc6 = self._fc_layer(self._pool5, "fc6") self._relu6 = tf.nn.relu(self._fc6) # Size: 4096x4096 self._fc7 = self._fc_layer(self._relu6, "fc7") self._relu7 = tf.nn.relu(self._fc7) # Size: 4096x1000 self._fc8 = self._fc_layer(self._relu7, "fc8") # For predicting. self._preds = tf.nn.softmax(self._fc8, name="prediction") # For training. self._loss = tf.nn.softmax_cross_entropy_with_logits(self._fc8, self._train_labels) self._optimizer = tf.train \ .GradientDescentOptimizer(self.learning_rate) \ .minimize(self._loss) @property def inputRGB(self): """ The input RGB images tensor of channels in RGB order. Shape must be of [?, 224, 224, 3] """ return self._inputRGB @property def inputBGR(self): """ The input RGB images tensor of channels in BGR order. Shape must be of [?, 224, 224, 3] """ return self._inputBGR @property def preds(self): """ The prediction(s) tensor, shape of [?, 1000]. """ return self._preds @property def train_labels(self): """ The train labels tensor, shape of [?, 1000]. """ return self._train_labels @property def loss(self): """ The loss tensor. """ return self._loss @property def optimizer(self): """ The optimizer tensor, used for the training. """ return self._optimizer def _avg_pool(self, value, name): return tf.nn.avg_pool(value, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def _max_pool(self, value, name): return tf.nn.max_pool(value, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def _conv_layer(self, value, name): with tf.variable_scope(name): filt = self._get_conv_filter(name) conv = tf.nn.conv2d(value, filt, [1, 1, 1, 1], padding='SAME') conv_biases = self._get_bias(name) bias = tf.nn.bias_add(conv, conv_biases) relu = tf.nn.relu(bias) return relu def _fc_layer(self, value, name): with tf.variable_scope(name): shape = value.get_shape().as_list() dim = 1 for d in shape[1:]: dim *= d x = tf.reshape(value, [-1, dim]) weights = self._get_fc_weight(name) biases = self._get_bias(name) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. fc = tf.nn.bias_add(tf.matmul(x, weights), biases) return fc def _get_conv_filter(self, name): return tf.Variable(self.model[name][0], name="filter") def _get_bias(self, name): return tf.Variable(self.model[name][1], name="biases") def _get_fc_weight(self, name): return tf.Variable(self.model[name][0], name="weights") def _init_empty_model(self): self.model = { # All the following things follows [0] = weights, [1] = biases. # Conv-layer 1. "conv1_1": [np.ndarray([3, 3, 3, 64]), np.ndarray([64])], "conv1_2": [np.ndarray([3, 3, 64, 64]), np.ndarray([64])], # Conv-layer 2. "conv2_1": [np.ndarray([3, 3, 64, 128]), np.ndarray([128])], "conv2_2": [np.ndarray([3, 3, 128, 128]), np.ndarray([128])], # Conv-layer 3. "conv3_1": [np.ndarray([3, 3, 128, 256]), np.ndarray([256])], "conv3_2": [np.ndarray([3, 3, 256, 256]), np.ndarray([256])], "conv3_3": [np.ndarray([3, 3, 256, 256]), np.ndarray([256])], "conv3_4": [np.ndarray([3, 3, 256, 256]), np.ndarray([256])], # Conv-layer 4. "conv4_1": [np.ndarray([3, 3, 256, 512]), np.ndarray([512])], "conv4_2": [np.ndarray([3, 3, 512, 512]), np.ndarray([512])], "conv4_3": [np.ndarray([3, 3, 512, 512]), np.ndarray([512])], "conv4_4": [np.ndarray([3, 3, 512, 512]), np.ndarray([512])], # Conv-layer 5. "conv5_1": [np.ndarray([3, 3, 512, 512]), np.ndarray([512])], "conv5_2": [np.ndarray([3, 3, 512, 512]), np.ndarray([512])], "conv5_3": [np.ndarray([3, 3, 512, 512]), np.ndarray([512])], "conv5_4": [np.ndarray([3, 3, 512, 512]), np.ndarray([512])], # FC layer. "fc6": [np.ndarray([25088, 4096]), np.ndarray([4096])], "fc7": [np.ndarray([4096, 4096]), np.ndarray([4096])], "fc8": [np.ndarray([4096, 1000]), np.ndarray([1000])]} return self.model

4 其他

這篇部落格對為什麼使用3×3的核解釋的比較清楚,以及使用淺層模型訓練來的引數對深層模型的前幾層進行初始化也有解釋。