TensorFlow從入門到放棄（二）——基於InceptionV3的遷移學習以及影象特徵的提取

阿新 • • 發佈：2019-01-06

1. flower資料集

共五種花的圖片

2. 圖片處理

將圖片劃分為train、val、test三個子集並提取圖片特徵。這個過程有點兒漫長請耐心等待。。。。。。

import glob
import os.path
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile

# 資料集的路徑
INPUT_DATA = r'E:\PythonSpace\finetune_NET\flower_photos'
# 分割好的資料集
OUT_FILE = r'E:\PythonSpace\finetune_NET\flower_processed_data.npy'

# 測試資料和驗證資料所佔的比例為10%
VALIDATION_PERCENTAGE = 10
TEST_PERCENTAGE = 10

def create_image_lists(sess, testing_percentage, validation_percentage):
    # 讀取資料集資料夾內的幾個資料夾
    sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
    is_root_dir = True

    # 初始化各個資料集
    training_images = []
    training_labels = []
    testing_images = []
    testing_labels = []
    validation_images = []
    validation_labels = []
    current_label = 0
    current_image = 0

    # 讀取所有的子目錄
    for sub_dir in sub_dirs:
        if is_root_dir:
            is_root_dir = False
            continue
        extension = 'jpg'
        file_list = []
        # 獲取圖片所屬的類別資料夾
        dir_name = os.path.basename(sub_dir)

        # 讀取資料夾下*.jpg的檔名
        file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extension)
        # 讀取名字為上面型別的檔案的名字，儲存到列表中
        file_list.extend(glob.glob(file_glob))

        for file_name in file_list:
            current_image = current_image + 1
            print(current_image)
            # 利用tensorflow的方法以二進位制的格式讀取影象
            image_raw_data = gfile.FastGFile(file_name, 'rb').read()
            # 對上面的二進位制影象進行解碼
            image = tf.image.decode_jpeg(image_raw_data)
            if image.dtype != tf.float32:
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
            # resize圖片大小
            image = tf.image.resize_images(image,[229,229])
            image_value = sess.run(image)

            # 隨機劃分資料集
            # 隨機生成一個0-100的數
            chance = np.random.randint(100)
            # 根據比例劃分資料集
            if chance < validation_percentage:
                validation_images.append(image_value)
                validation_labels.append(current_label)
            elif chance < (validation_percentage + testing_percentage):
                testing_images.append(image_value)
                testing_labels.append(current_label)
            else:
                training_images.append(image_value)
                training_labels.append(current_label)
        current_label += 1
    # 打亂訓練集資料
    state = np.random.get_state()
    np.random.shuffle(training_images)
    np.random.set_state(state)
    np.random.shuffle(training_labels)

    return np.asarray([training_images, training_labels,
                       validation_images, validation_labels,
                       testing_images, testing_labels])
# 定義主函式
def main():
    with tf.Session() as sess:
        processed_data = create_image_lists(sess, TEST_PERCENTAGE, VALIDATION_PERCENTAGE)
        np.save(OUT_FILE, processed_data)
if __name__ == '__main__':
    main()

3. 下載預訓練好的inception-v3網路模型權重檔案

4. finetune_NET.py

import glob
import os.path
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile
import tensorflow.contrib.slim as slim

# 載入inception_v3模型
import tensorflow.contrib.slim.python.slim.nets.inception_v3 as inception_v3

# 匯入處理之後的資料檔案
INPUT_DATA = r'E:\PythonSpace\finetune_NET\flower_processed_data.npy'
# 定義finetune後變數儲存的位置
TRAIN_FILE = r'E:\PythonSpace\finetune_NET\model'
# 預訓練的model檔案
CKPT_FILE = r'E:\PythonSpace\finetune_NET\inception_v3.ckpt'

# 定義訓練中使用的引數
LEARNING_RATE = 0.0001
# 定義訓練輪數，每輪訓練要跑完所有訓練圖片
STEPS = 300
# 程式前向執行每次有多少張圖片參與
BATCH = 30
# 類別數
N_CLASSES = 5

# finetune時，只是finetune最後的全連線層
CHECKPOINT_EXCLUDE_SCOPES = 'InceptionV3/Logits, InceptionV3/AuxLogits'
TRAINABLE_SCOPES = 'InceptionV3/Logits, InceptionV3/AuxLogits'

# 獲取所有需要從訓練好的模型中匯入資料
def get_tuned_variables():
    exclusions = [scope.strip() for scope in CHECKPOINT_EXCLUDE_SCOPES.split(',')]

    # 用於儲存需要載入引數的名稱
    variables_to_restore = []
    for var in slim.get_model_variables():
        excluded = False
        for exclusion in exclusions:
            if var.op.name.startswith(exclusion):
                excluded = True
                break
        if not excluded:
            variables_to_restore.append(var)
    return variables_to_restore
# 初始化需要訓練的兩個層的變數
def get_trainable_variables():
    scopes = [scope.strip() for scope in TRAINABLE_SCOPES.split(',')]
    variables_to_train = []
    for scope in scopes:
        variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
        variables_to_train.extend(variables)
    return variables_to_train

def main(argv=None):
    # 載入預處理的資料
    processed_data = np.load(INPUT_DATA)
    training_images = processed_data[0]
    n_training_example = len(training_images)
    training_labels = processed_data[1]
    validation_images = processed_data[2]
    validation_labels = processed_data[3]
    testing_images = processed_data[4]
    testing_labels = processed_data[5]
    print ("%d training examples, %d validation examples and %d"
           "testing examples."%(n_training_example, len(validation_labels),
                                len(testing_labels)))
    # 定義網路的輸入
    images = tf.placeholder(tf.float32, [None, 229, 229, 3], name="input_images")
    labels = tf.placeholder(tf.int64, [None], name="labels")
    # 網路的前向執行
    with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
        logits, _ = inception_v3.inception_v3(images, num_classes=N_CLASSES)

    # 獲取需要訓練的變數
    trainable_variables = get_trainable_variables()
    # 定義交叉熵損失
    tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits, weights=1.0)
    # 優化損失函式
    train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(tf.losses.get_total_loss())

    # 計算正確率
    with tf.name_scope("evaluation"):
        correct_prediction = tf.equal(tf.argmax(logits,1), labels)
        evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # 匯入預訓練好的權重
    load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE, get_tuned_variables(), ignore_missing_vars=True)
    # 用於儲存finetune後的權重
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # 初始化沒有載入進來的變數
        init = tf.global_variables_initializer()
        sess.run(init)

        print ("loading tuned variables from %s" % CKPT_FILE)
        load_fn(sess)

        start = 0
        end = BATCH
        for i in range(STEPS):
            # 開始訓練
            sess.run(train_step, feed_dict={
                images: training_images[start:end],
                labels: training_labels[start:end]
            })
            if i%30 == 0 or i+1 == STEPS:
                # 這裡儲存權重時一定要帶後面的那個.ckpt
                model_path = os.path.join(TRAIN_FILE, 'model_step' + str(i + 1) + '.ckpt')
                # 儲存權重
                saver.save(sess, model_path)
                validation_accuracy = sess.run(evaluation_step, feed_dict={images: validation_images,
                                                                           labels: validation_labels})
                print('Step %d: Validation accuracy = %.1f%%' % (i, validation_accuracy*100.0))
            start = end
            if start == n_training_example:
                start = 0
            end = start + BATCH
            if end > n_training_example:
                end = n_training_example
        # 訓練完成後對測試集進行測試
        test_accuracy = sess.run(evaluation_step, feed_dict={
            images:testing_images, labels:testing_labels})
        print("final test accuracy = %.1f%%" %(test_accuracy*100))
if __name__ == '__main__':
    tf.app.run()

這裡為什麼是這兩個字串一直困擾著我，後來我試著後來我看了一下定義網路的檔案以及.ckpt檔案中所存取的變數的名字似乎有了一點理解。

CHECKPOINT_EXCLUDE_SCOPES = 'InceptionV3/Logits, InceptionV3/AuxLogits'
TRAINABLE_SCOPES = 'InceptionV3/Logits, InceptionV3/AuxLogits'

import tensorflow as tf
from tensorflow.python import pywrap_tensorflow

#首先，使用tensorflow自帶的python打包庫讀取模型
model_reader = pywrap_tensorflow.NewCheckpointReader(r"E:\PythonSpace\finetune_NET\inception_v3.ckpt")

#然後，使reader變換成類似於dict形式的資料
var_dict = model_reader.get_variable_to_shape_map()

#最後，迴圈列印輸出
for key in var_dict:
    print("variable name: ", key)

輸出結果為：

我又查看了網路檔案，D:\Anaconda2\envs\Anaconda3\Lib\site-packages\tensorflow\contrib\slim\python\slim\nets，在這個資料夾下可以看到有許多經典的網路模型檔案。

在inception_v3.py檔案中我們可以看到最後兩層是AuxLogits層和Logits層

從上面可以看出所有變數的名都是InceptionV3/開始，從網路檔案可以看出最後兩層為AuxLogits和Logits

finetune_NET.py執行效果截圖：

5. 提取圖片特徵（deploy.py）

在日常實驗中我們有時候不僅要看網路的最終分類結果，有時我們也會提取倒數第二個全連線層的資料作為圖片的特徵。這裡我們假設提取訓練集圖片的特徵，只是寫個例子熟悉流程

import glob
import os.path
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile
import tensorflow.contrib.slim as slim
import scipy.io as scio

# 載入inception_v3模型
import tensorflow.contrib.slim.python.slim.nets.inception_v3 as inception_v3

# 匯入處理之後的資料檔案
INPUT_DATA = r'E:\PythonSpace\finetune_NET\flower_processed_data.npy'
TRAIN_FILE = r'E:\PythonSpace\finetune_NET\model'

# deploy使用的引數
# STEPS * BATCH = 訓練圖片個數
STEPS = 100
BATCH = 20
N_CLASSES = 5
# fine_tune後儲存的網路model檔案
model_path = r"E:\PythonSpace\finetune_NET\model\model_step31.ckpt"
feat_train = []

def main(argv=None):
    # 載入預處理的資料
    processed_data = np.load(INPUT_DATA)
    training_images = processed_data[0]
    n_training_example = len(training_images)
    training_labels = processed_data[1]
    validation_images = processed_data[2]
    validation_labels = processed_data[3]
    testing_images = processed_data[4]
    testing_labels = processed_data[5]
    
    # 定義網路的輸入
    images = tf.placeholder(tf.float32, [None, 229, 229, 3], name="input_images")
    labels = tf.placeholder(tf.int64, [None], name="labels")

    with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
        # num_classes: 必須設定，否則匯入精調後的權重會報錯
        # is_training: 這個設定與否，觀察結果後發現無影響，因為只做一次前向，為保險建議設定為false
        # dropout: 儲存率，提取圖片特徵TensorFlow設定為1
        # 這個end_points儲存了網路執行的每一層的資料
        logits, end_points = inception_v3.inception_v3(images, num_classes=N_CLASSES,is_training=False, dropout_keep_prob=1.)

    feat = end_points

    saver = tf.train.Saver()

    with tf.Session() as sess:
        # 呼叫finetune好的model來初始化網路引數
        saver.restore(sess, model_path)
        start = 0
        end = BATCH
        for i in range(STEPS):
            feats = sess.run(feat, feed_dict={
                images: training_images[start:end],
                labels: training_labels[start:end]
            })
            # 提取相應層的特徵
            f = feats['PreLogits']
            f = np.asarray(f)
            # 提取的特徵是batch*1*1*2048的，我們要轉化為batch*2048
            f = f[:,0,0,:]
            feat_train.extend(f)

            start = end
            if start == n_training_example:
                # 訓練集的圖片特徵儲存為.mat檔案
                scio.savemat(r"E:\PythonSpace\finetune_NET\feat_train.mat", {"feat_train": feat_train})
                break
                start = 0
            end = start + BATCH
            if end > n_training_example:
                end = n_training_example
if __name__ == '__main__':
    tf.app.run()

這裡我們可以看出inception_v3（）函式的輸入變數。

從這裡我們可以看出我們要提取的特徵儲存在end_points這個字典中的Key為'PreLogits'。

提取的特徵結果截圖：

本文是自己TensorFlow學習過程中的點滴積累，可能初學對某些地方的理解存在錯誤，懇請批評指正

TensorFlow從入門到放棄（二）——基於InceptionV3的遷移學習以及影象特徵的提取

1. flower資料集共五種花的圖片 2. 圖片處理將圖片劃分為train、val、test三個子集並提取圖片特徵。這個過程有點兒漫長請耐心等待。。。。。。 import glob import os.path import numpy as np im

GAN網路從入門教程（二）之GAN原理

在一篇部落格[GAN網路從入門教程（一）之GAN網路介紹](https://www.cnblogs.com/xiaohuiduan/p/13237486.html)中，簡單的對GAN網路進行了一些介紹，介紹了其是什麼，然後大概的流程是什麼。在這篇部落格中，主要是介紹其數學公式，以及其演算法流程。當然數學公

node.js從入門到放棄（二）

簡單的 all == ons true nts 數值定時註冊上章講了學習node，應該去學習什麽，對這些框架去進行學習現在咋們聊聊如何用原生來進行操作主要來講一下events-事件觸發器先來講一個簡單的實例 EventEmitter的實例，綁定一個監聽器。用

Tensorflowsharp從入門到放棄（二）——這次有個手寫數字識別

接上文，原始碼是控制檯的：所有輸出Console.WriteLine(*);這樣的程式碼一致改為this.textBox1.Text +="\r\n"+ string.Format(*); 這次又更新了四課內容，其中手寫數字識別卡住了一會，主要原因是網上 TF#的MNIST

python爬蟲從入門到放棄（二）之爬蟲的原理

在上文中我們說了：爬蟲就是請求網站並提取資料的自動化程式。其中請求，提取，自動化是爬蟲的關鍵！下面我們分析爬蟲的基本流程爬蟲的基本流程發起請求通過HTTP庫向目標站點發起請求，也就是傳送一個Request，請求可以包含額外的header等資訊，等待伺服器響應獲取響應內容如果伺服器能正常響應，會得到一個Resp

React-Native從入門到放棄（二）

demo完成之後，公司沒用，沒有精力再弄了，給大家列出學習資料 React Native ES6 系列教程開源APP 研究原始碼也是一個很好的學習方式

SpringBoot-從入門到放棄（二）開發環境的搭建

上一篇講到，雖然SB預設集成了很多東西很方便，但是天下沒有免費午餐，凡事都是雙刃劍，SB開發環境的搭建比普通的專案要複雜那麼一些，不像普通專案一樣，直接new project然後慢慢搭

python從入門到放棄（二）

python3 其他產生用戶值方法字母 str 是我表示編碼類型 ASCII碼：主要用來顯示英文和其他西歐語言，用8位來表示，也就是一個字節，最多只能顯示256個符號 UNICODE：也叫萬國碼，為了解決傳統的字符編碼方案的局限而產生的，它為每種語言的每個字符

兩種開源聊天機器人的性能測試（二）——基於tensorflow的chatbot

drive 找到環境配置 gpu版本 hat dict 終端開源 fontsize http://blog.csdn.net/hfutdog/article/details/78155676 開源項目鏈接：https://github.com/dennybritz/c

Mybatis（二）基於註解的入門實例

調用 getc val void pass 搭建 for erb date 前言上一篇簡單的介紹了Mybatis的概念和基於XML來實現數據庫的CRUD，這篇給大家實現基於註解的CRUD。一、初始搭建在基於註解當中前四步和上一篇基於XML是一樣的，分別是： 1）創建數

tensorflow 入門例項（二）

import tensorflow as tf # 建立一個常量 op, 產生一個 1x2 矩陣. 這個 op 被作為一個節點 # 加到預設圖中. # # 構造器的返回值代表該常量 op 的返回值.

python爬蟲"Hello World"級入門例項（二）,使用json從中國天氣網抓取資料

一、二話不說先上程式碼 python2.7版 #!/usr/bin/python2.7 #-*- coding=UTF-8 -*- import urllib import json def get_dic(url): page = urll

深度學習入門實戰（二）-用TensorFlow訓練線性迴歸

TensorFlow是谷歌爸爸出的一個開源機器學習框架，目前已被廣泛應用，谷歌爸爸出品即使效能不是最強的（其實效能也不錯），但絕對是用起來最方便的，畢竟谷歌有Jeff Dean坐鎮，這波穩。 0x01 TensorFlow安裝官方有一個Mac上TensorFlow

深度學習入門實戰（二）：用TensorFlow訓練線性迴歸

0x00 概要 TensorFlow是谷歌爸爸出的一個開源機器學習框架，目前已被廣泛應用，谷歌爸爸出品即使效能不是最強的（其實效能也不錯），但絕對是用起來最方便的，畢竟谷歌有Jeff Dean坐鎮，這波穩。 0x01 TensorFlow安裝官方有一個Mac上Te

如何用Tensorflow訓練模型成pb檔案（二）——基於tfrecord的讀取

簡介上一篇介紹了基於原始圖片的讀取，這一篇介紹基於TFRecord的讀取。TFRecord是TensorFlow提供的資料讀取格式，效率高。這裡不介紹TFRecord的製作過程，網上有很多，假設你已經瞭解了。訓練定義網路結構，與上一篇相似，不多說了，

TensorFlow深度學習入門筆記（二）基本概念與代碼1

.get ali ant scope 基本有一個關註執行 rbo 關註公眾號“從機器學習到深度學習那些事”獲取更多最新資料寫在前面學習建議：以下學習過程中有不理解可以簡單查找下資料，但不必糾結（比如非得深究某一個函數等），盡量快速的學一遍，不求甚解無妨。因為有些知

Asp.Net Core WebAPI入門整理（二）簡單示例

序列 open exc tor pda template ssa net found 一、Core WebAPI中的序列化使用的是Newtonsoft.Json,自定義全局配置處理： // This method gets called by the runtime.

Struts2入門介紹（二）

輸入 clu ons dom 訪問路徑訪問 filter pri locale 一、Struts執行過程的分析。　　當我們在瀏覽器中輸入了網址http://127.0.0.1:8080/Struts2_01/hello.action的時候，Struts2做了如下過程：

CodeArt入門教程（二）

本質文件夾不同的存在切換數據庫站點 ear 新的組裝 4.第一個示例的編碼工作　　使用CA編碼項目的核心結構是：由多個子系統組成多個不同的服務來提供項目的各種功能。請不要將這裏提到的子系統與大家在別的項目實施方法裏的概念混為一談，CA裏的子系統概念是完全不一樣

OpenCV入門筆記（二）圖片的文件操作

strong asc nump str destroy type convert 代碼 creat 以下介紹一下重要的幾個，設計基本圖片處理的函數，依次來了解OpenCV的入門知識。具體的具體使用方法還是以官方的API【Official Tutori

TensorFlow從入門到放棄（二）——基於InceptionV3的遷移學習以及影象特徵的提取

1. flower資料集

2. 圖片處理

3. 下載預訓練好的inception-v3網路模型權重檔案

4. finetune_NET.py

5. 提取圖片特徵（deploy.py）

相關推薦