Tensorflow學習筆記：讀取二進位制檔案、讀寫TFRecord檔案

阿新 • • 發佈：2019-01-02

#影象基本知識
OpenCV已經學過

#圖片操作目的：
增加圖片資料的統一性：大小與格式統一
縮小圖片資料量，防止增加開銷

#圖片操作：放大或縮小
   tf.image.resize_images(images,size)
       image:4-D陣列[batch,length,width,depth] 或者 3-D陣列[length,width,depth]的圖片資料
       size:一維的int32張量[new_length,new_width]

#圖片讀取API
   tf.WholeFileReader()將檔案全部內容作為值的讀取器
       return :讀取器例項
       read方法(file_queue)輸出是一個檔名（key）和該檔案內容（值value）

   tf.image.decode_jpeg(contents)將jpeg檔案解碼為uint8型別的張量
       return   uint8型別張量[height,length,channels]
   tf.image.decode_png(contests)將png檔案解碼為uint8或者uint16型別的張量

#圖片批量讀取示例：

def picreader(filelist):
    """
    args:list of picture file to read
    return: batch of read result
    """
    #1、構造檔案閱讀佇列
    file_queue = tf.train.string_input_producer(filelist)


    #2、構造檔案閱讀器讀取檔案
    reader = tf.WholeFileReader()
    key , raw_data = reader.read(file_queue)

    print(raw_data)
    #3、構造檔案解碼器
    read_result = tf.image.decode_jpeg(raw_data)
    print(read_result)

    #4、統一圖片大小
    read_result = tf.image.resize_images(read_result, [200,200])

    #固定樣本形狀，否則無法放入佇列
    read_result.set_shape([200,200,3])
    print(read_result)

    #5、進行批處理
    read_result_batch = tf.train.batch([read_result], batch_size = 5, num_threads = 1, capacity = 7)

    #6、返回資料
    return read_result_batch

import os
import tensorflow as tf

if __name__ == "__main__":
    dir_file_list = os.listdir(".\\pic_data\\")
    dir_file_list = ["C:\\Users\\xie\\pic_data\\" + i for i in dir_file_list]
    print(dir_file_list)
    
    read_result = picreader(dir_file_list)

    with tf.Session() as sess:

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord = coord)
        for i in range(3):
            print(sess.run(read_result).shape)

        coord.request_stop()
        coord.join(threads)

#tf的切片操作
    a = [lable,imageData1,imageData2 ...]
    lable = tf.slice(a, [0], [1])
    image = tf.slice(a, [1], [size_of_data])


#讀取二進位制檔案程式碼：
    def binreader(filelist):
    
    #1、構造檔案列表
    file_queue = tf.train.string_input_producer(filelist)

    #2、構造閱讀器進行閱讀
    reader = tf.FixedLengthRecordReader(1024 * 3 + 1)
    key, value = reader.read(file_queue)

    #3、構造解碼器進行解碼
    decoded_data = tf.decode_raw(value, tf.uint8)

    #4、分割出圖片和標籤

    lable = tf.slice(decoded_data, [0], [1])
    image = tf.slice(decoded_data, [1], [3072])

     #5、改變image大小
    image = tf.reshape(image, [32, 32, 3])

    #6、批處理
    batch_lable, batch_image = tf.train.batch([lable,image], batch_size = 10, num_threads = 1, capacity = 100)

    print(batch_lable, batch_image)

    return batch_lable, batch_image

import tensorflow as tf 
import os

if __name__ == '__main__':
    
    #獲取目標路徑下檔案列表
    dir_file_list = os.listdir('C:\\Users\\xie\\binary_data\\')
    
    #選擇適當檔案
    dir_file_list = ['C:\\Users\\xie\\binary_data\\' + i for i in dir_file_list \
                        if i[-3:] == 'bin']

    read_result = binreader(dir_file_list)

    with tf.Session() as sess:

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord = coord)

        print(sess.run(read_result))

        coord.request_stop()
        coord.join(threads)

#tf自帶的檔案格式TFRecords
   為了將特徵值和標籤值儲存在一個標籤內部，方便存取和移動
   用Example協議塊（一個類似字典的格式）儲存樣本
#寫入TFRecord檔案
   1、建立TFrecord儲存器
   tf.python_io.TFRecordWriter(path)
   return 檔案寫入器例項

   方法：write(string)寫入一個字串記錄（一個example）
       close()
#程式碼示例：

def binreader(filelist):
    
    #1、構造檔案列表
    file_queue = tf.train.string_input_producer(filelist)

    #2、構造閱讀器進行閱讀
    reader = tf.FixedLengthRecordReader(1024 * 3 + 1)
    key, value = reader.read(file_queue)

    #3、構造解碼器進行解碼
    decoded_data = tf.decode_raw(value, tf.uint8)

    #4、分割出圖片和標籤

    lable = tf.slice(decoded_data, [0], [1])
    image = tf.slice(decoded_data, [1], [3072])

     #5、改變image大小
    image = tf.reshape(image, [32, 32, 3])

    #6、批處理
    batch_lable, batch_image = tf.train.batch([lable,image], batch_size = 10, num_threads = 1, capacity = 100)

    print(batch_lable, batch_image)

    return batch_lable, batch_image


def write_to_tfrecords(batch_lable, batch_image):
    """
    將檔案的特徵值和目標值存入tfrecords檔案中
    :param batch_lable 10個標籤
    :param batch_image 10個特徵值
    """
    #1、構造TFRecorder儲存器
    writer = tf.python_io.TFRecordWriter("./binary_data/TFRecord/cifar.tfrecords")

    #2、迴圈將所有樣本寫入檔案，每張圖片都要構造example協議
    for i in range(10):
        #取出第i個樣本的特徵值和標籤值
        image = batch_image[i].eval().tostring()
        lable = batch_lable[i].eval()[0]

        #構造Example
        example = tf.train.Example(features = tf.train.Features(feature = {
            "image":tf.train.Feature(bytes_list = tf.train.BytesList(value = [image])),
            "lable":tf.train.Feature(int64_list = tf.train.Int64List(value = [lable]))}))
        writer.write(example.SerializeToString())
        print("%d times of store is over"%i)

    writer.close()

import tensorflow as tf 
import os

if __name__ == '__main__':
    
    #獲取目標路徑下檔案列表
    dir_file_list = os.listdir('C:\\Users\\xie\\binary_data\\')
    
    #選擇適當檔案
    dir_file_list = ['C:\\Users\\xie\\binary_data\\' + i for i in dir_file_list \
                        if i[-3:] == 'bin']

    read_result = binreader(dir_file_list)

    with tf.Session() as sess:

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord = coord)

        sess.run(read_result)
        print("start to store")
        write_to_tfrecords(read_result[0],read_result[1])
        print("end of store")
        coord.request_stop()
        coord.join(threads)

#讀取tfrecords檔案（多了一步解析）
   tf.parse_single_example(serialized，   #讀出的內容
               features = None,#dict字典資料，鍵位讀取的名字，值為FixedLenFeature
       return：一個鍵值對組成的字典，鍵為讀取的名字
#示例程式碼：

def binreader(filelist):
    
    #1、構造檔案列表
    file_queue = tf.train.string_input_producer(filelist)

    #2、構造閱讀器進行閱讀
    reader = tf.FixedLengthRecordReader(1024 * 3 + 1)
    key, value = reader.read(file_queue)

    #3、構造解碼器進行解碼
    decoded_data = tf.decode_raw(value, tf.uint8)

    #4、分割出圖片和標籤

    lable = tf.slice(decoded_data, [0], [1])
    image = tf.slice(decoded_data, [1], [3072])

     #5、改變image大小
    image = tf.reshape(image, [32, 32, 3])

    #6、批處理
    batch_lable, batch_image = tf.train.batch([lable,image], batch_size = 10, num_threads = 1, capacity = 100)

    print(batch_lable, batch_image)

    return batch_lable, batch_image


def write_to_tfrecords(batch_lable, batch_image):
    """
    將檔案的特徵值和目標值存入tfrecords檔案中
    :param batch_lable 10個標籤
    :param batch_image 10個特徵值
    """
    #1、構造TFRecorder儲存器
    writer = tf.python_io.TFRecordWriter("./binary_data/TFRecord/cifar.tfrecords")

    #2、迴圈將所有樣本寫入檔案，每張圖片都要構造example協議
    for i in range(10):
        #取出第i個樣本的特徵值和標籤值
        image = batch_image[i].eval().tostring()
        lable = batch_lable[i].eval()[0]

        #構造Example
        example = tf.train.Example(features = tf.train.Features(feature = {
            "image":tf.train.Feature(bytes_list = tf.train.BytesList(value = [image])),
            "lable":tf.train.Feature(int64_list = tf.train.Int64List(value = [lable]))}))
        writer.write(example.SerializeToString())
        print("%d times of store is over"%i)

    writer.close()

def read_from_tfrecords():
    #1、構造檔案佇列
    file_queue = tf.train.string_input_producer(['C:\\Users\\xie\\binary_data\\TFRecord\\cifar.tfrecords'])
    
    #2、構造檔案閱讀器
    reader = tf.TFRecordReader()

    #3、讀取佇列，value也是一個樣本的序列化值
    key, value = reader.read(file_queue)

    features = tf.parse_single_example(value, 
                    features = {'image':tf.FixedLenFeature([],tf.string),
                                'lable':tf.FixedLenFeature([],tf.int64)})
    

    #4、解碼：當且僅當取出string型別時，需要解碼
    image = tf.decode_raw(features["image"], tf.uint8)
    lable = features["lable"]

    
    image_reshape = tf.reshape(image, [32, 32, 3])
    
    
    image_batch, lable_batch = tf.train.batch([image_reshape, lable], batch_size = 10, num_threads = 1, capacity = 10)

    return image_batch, lable_batch

import tensorflow as tf 
import os

if __name__ == '__main__':
    
    #獲取目標路徑下檔案列表
    dir_file_list = os.listdir('C:\\Users\\xie\\binary_data\\')
    
    #選擇適當檔案
    dir_file_list = ['C:\\Users\\xie\\binary_data\\' + i for i in dir_file_list \
                        if i[-3:] == 'bin']

    image_batch, lable_batch = read_from_tfrecords()

    with tf.Session() as sess:

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord = coord)
        print(sess.run([image_batch, lable_batch]))
        
        
        coord.request_stop()
        coord.join(threads)

Tensorflow學習筆記：讀取二進位制檔案、讀寫TFRecord檔案

Tensorflow學習筆記：讀取二進位制檔案、讀寫TFRecord檔案

Tensorflow學習筆記：變數作用域、模型的載入與儲存、執行緒與佇列實現多執行緒讀取樣本

VBA建立文字檔案、讀寫文字檔案

JNI中新建檔案、讀寫普通檔案和驅動檔案的方法綜述fopen/open/creat/

tensorflow學習筆記：sess.run()

python學習筆記：迭代器、生成器、yield關鍵字

Tensorflow學習筆記：資料集加工和轉化為TensorFlow專用格式——Finetuning，貓狗大戰，VGGNet的重新針對訓練

Tensorflow學習筆記：VGG16模型——Finetuning，貓狗大戰，VGGNet的重新針對訓練

Tensorflow學習筆記：VGG16訓練——Finetuning，貓狗大戰，VGGNet的重新針對訓練

TensorFlow學習筆記：檔名衝突導致報錯

Tensorflow學習筆記：多輸入線性迴歸神經網路

Tensorflow學習筆記：實現簡單的線性迴歸

Tensorflow學習筆記：Tensorflow基礎複習

js學習筆記：事件——事件流、事件處理程式、事件物件

OpenCV學習筆記：運動物體檢測、跟蹤和繪製曲線運動軌跡

Tensorflow學習筆記：基礎篇（7）——Mnist手寫集改進版（Tensorboard視覺化）

tensorflow學習筆記：卷積神經網路最終筆記

C# Path操作路徑檔案、File處理檔案、讀寫檔案：byte[];string[];string； string與byte[]轉換,File.AppendAllText()

python學習筆記（12）文件讀寫

Python學習筆記__9.1章文件讀寫

Tensorflow學習筆記：讀取二進位制檔案、讀寫TFRecord檔案

相關推薦