1. 程式人生 > >深度學習入門——利用卷積神經網路訓練CIFAR—10資料集

深度學習入門——利用卷積神經網路訓練CIFAR—10資料集

CIFAR-10資料集簡介

CIFAR-10是由Hinton的學生Alex Krizhevsky和Ilya Sutskever整理的一個用於普適物體的小型資料集。它一共包含10個類別的RGB彩色圖片:飛機、汽車、鳥類、貓、鹿、狗、蛙類、馬、船:

資料集包含50000張訓練圖片和10000張測試圖片,與MNIST手寫數字資料集的區別:

                   CIFAR-10                           MNIST
3通道彩色RGB影象 灰度影象
尺寸32x32 尺寸28x28
比例、特徵不同 特徵較為明顯

 所以線性模型在CIFAR-10表現很差。

 

資料集及程式碼檔案下載

資料集下載

官網地址:https://www.cs.toronto.edu/~kriz/cifar.html

程式碼下載

https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10

檔案 用途
cifar10_input.py  在TensorFlow中讀入CIFAR-10訓練圖片
cifar10_input_test.py 測試cifar10_input.py 
cifar10.py 建立預測模型
cifar10_train.py 使用單個CPU或GPU訓練
cifar10_eval.py 在測試集上測試模型的效能
cifar10_multi_gpu_train.py 使用多個GPU訓練

也可以從我的網盤下載:

連結:https://pan.baidu.com/s/1GyiKrMeMpXALOxuQRn_zsg 密碼:1a5y

 

從資料集提取圖片

注意要先解壓才能從.bin檔案中提取

# coding: utf-8
# 匯入當前目錄的cifar10_input,這個模組負責讀入cifar10資料
import cifar10_input
# 匯入TensorFlow和其他一些可能用到的模組。
import tensorflow as tf
import os
import scipy.misc


def inputs_origin():
  # filenames一共5個,從data_batch_1.bin到data_batch_5.bin
  # 讀入的都是訓練影象
  # 改為自己資料集檔案的地址
  filenames = [r'E:\Program\Python\Deep-Learning-21-Examples-master\chapter_2\cifar10_data\cifar-10-batches-bin\data_batch_%d.bin'
               % i for i in range(1, 6)]
  # 判斷檔案是否存在
  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)
  # 將檔名的list包裝成TensorFlow中queue的形式
  filename_queue = tf.train.string_input_producer(filenames)
  # cifar10_input.read_cifar10是事先寫好的從queue中讀取檔案的函式
  # 返回的結果read_input的屬性uint8image就是影象的Tensor
  read_input = cifar10_input.read_cifar10(filename_queue)
  # 將圖片轉換為實數形式
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)
  # 返回的reshaped_image是一張圖片的tensor
  # 我們應當這樣理解reshaped_image:每次使用sess.run(reshaped_image),就會取出一張圖片
  return reshaped_image


if __name__ == '__main__':
  # 建立一個會話sess
  with tf.Session() as sess:
    # 呼叫inputs_origin。cifar10_data/cifar-10-batches-bin是我們下載的資料的資料夾位置
    reshaped_image = inputs_origin()
    # 這一步start_queue_runner很重要。
    # 我們之前有filename_queue = tf.train.string_input_producer(filenames)
    # 這個queue必須通過start_queue_runners才能啟動
    # 缺少start_queue_runners程式將不能執行
    threads = tf.train.start_queue_runners(sess=sess)
    # 變數初始化
    sess.run(tf.global_variables_initializer())
    # 建立資料夾cifar10_data/raw/
    if not os.path.exists('cifar10_data/raw/'):
      os.makedirs('cifar10_data/raw/')
    # 儲存30張圖片
    for i in range(30):
      # 每次sess.run(reshaped_image),都會取出一張圖片
      image_array = sess.run(reshaped_image)
      # 將圖片儲存
      scipy.misc.toimage(image_array).save('cifar10_data/raw/%d.jpg' % i)

結果

卷積神經網路模型註釋

參考:https://blog.csdn.net/akadiao/article/details/79618342(這篇把所有檔案都註釋了,很全面)

#建立模型

def inference(images):
    # 第一層卷積
    with tf.variable_scope('conv1') as scope:
        # 卷積核
        kernel = _variable_with_weight_decay('weights',shape=[5, 5, 3, 64],stddev=5e-2, wd=None)
        # 卷積
        conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
        # 偏置初始化為0
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
        pre_activation = tf.nn.bias_add(conv, biases)
        # ReLu啟用
        conv1 = tf.nn.relu(pre_activation, name=scope.name)
        # 彙總
        _activation_summary(conv1)
    # 第一層池化
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
    # lrn層
    norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
    # 第二層卷積
    with tf.variable_scope('conv2') as scope:
        # 卷積核
        kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=None)
        conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
        # 偏置初始化為0.1
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv, biases)
        # ReLu啟用
        conv2 = tf.nn.relu(pre_activation, name=scope.name)
        # 彙總
        _activation_summary(conv2)
    # 第二lrn層
    norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
    # 第二層池化
    pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')

    # 連線層
    with tf.variable_scope('local3') as scope:
        # 轉換為一維向量
        reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
        # 維數
        dim = reshape.get_shape()[1].value
        # 防止過擬合
        weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004)
        # 偏置初始化為0.1
        biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
        # ReLu啟用
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
        _activation_summary(local3)

    # 連線層
    with tf.variable_scope('local4') as scope:
        # 防止過擬合
        weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004)
        # 偏置初始化為0.1
        biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
        # ReLu啟用
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
        _activation_summary(local4)

    # 線性層
    # (WX+b)不使用softmax,因為tf.nn.sparse_softmax_cross_entropy_with_logits接受未縮放的logits並在內部執行softmax以提高效率
    with tf.variable_scope('softmax_linear') as scope:
        weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1/192.0, wd=None)
        # biases初始化為0
        biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
        # (WX+b) 進行線性變換以輸出 logits
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
        # 彙總
        _activation_summary(softmax_linear)
    return softmax_linear



# 模型訓練

# 損失
def loss(logits, labels):
    labels = tf.cast(labels, tf.int64)
    # 計算logits和labels之間的交叉熵
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      labels=labels, logits=logits, name='cross_entropy_per_example')
    # 計算整個批次的平均交叉熵損失
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    # 把變數放入一個集合
    tf.add_to_collection('losses', cross_entropy_mean)
    # 總損失定義為交叉熵損失加上所有的權重衰減項(L2損失)
    return tf.add_n(tf.get_collection('losses'), name='total_loss')

# 總損失
def _add_loss_summaries(total_loss):
    # 指數移動平均
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    losses = tf.get_collection('losses')
    # 將指數移動平均應用於單個損失
    loss_averages_op = loss_averages.apply(losses + [total_loss])
    # 單個損失損失和全部損失的標量summary
    for l in losses + [total_loss]:
        # 將每個損失命名為raw,並將損失的移動平均命名為原始損失
        tf.summary.scalar(l.op.name + ' (raw)', l)
        tf.summary.scalar(l.op.name, loss_averages.average(l))
    return loss_averages_op

# 訓練CIFAR-10模型

def train(total_loss, global_step):
    # 影響學習率的變數
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
    # 指數衰減
    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR, staircase=True)
    tf.summary.scalar('learning_rate', lr)
    # 總損失
    loss_averages_op = _add_loss_summaries(total_loss)
    # 計算梯度
    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)
    for grad, var in grads:
        if grad is not None:
            tf.summary.histogram(var.op.name + '/gradients', grad)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train')
    return train_op

訓練的結果會儲存在cifar10_train資料夾下