Tensorflow學習筆記:讀取二進位制檔案、讀寫TFRecord檔案
#影象基本知識
OpenCV已經學過
#圖片操作目的:
增加圖片資料的統一性:大小與格式統一
縮小圖片資料量,防止增加開銷
#圖片操作:放大或縮小
tf.image.resize_images(images,size)
image:4-D陣列[batch,length,width,depth] 或者 3-D陣列[length,width,depth]的圖片資料
size:一維的int32張量[new_length,new_width]
#圖片讀取API
tf.WholeFileReader()將檔案全部內容作為值的讀取器
return :讀取器例項
read方法(file_queue)輸出是一個檔名(key)和該檔案內容(值value)
tf.image.decode_jpeg(contents)將jpeg檔案解碼為uint8型別的張量
return uint8型別張量[height,length,channels]
tf.image.decode_png(contests)將png檔案解碼為uint8或者uint16型別的張量
#圖片批量讀取示例:
def picreader(filelist): """ args:list of picture file to read return: batch of read result """ #1、構造檔案閱讀佇列 file_queue = tf.train.string_input_producer(filelist) #2、構造檔案閱讀器讀取檔案 reader = tf.WholeFileReader() key , raw_data = reader.read(file_queue) print(raw_data) #3、構造檔案解碼器 read_result = tf.image.decode_jpeg(raw_data) print(read_result) #4、統一圖片大小 read_result = tf.image.resize_images(read_result, [200,200]) #固定樣本形狀,否則無法放入佇列 read_result.set_shape([200,200,3]) print(read_result) #5、進行批處理 read_result_batch = tf.train.batch([read_result], batch_size = 5, num_threads = 1, capacity = 7) #6、返回資料 return read_result_batch import os import tensorflow as tf if __name__ == "__main__": dir_file_list = os.listdir(".\\pic_data\\") dir_file_list = ["C:\\Users\\xie\\pic_data\\" + i for i in dir_file_list] print(dir_file_list) read_result = picreader(dir_file_list) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord = coord) for i in range(3): print(sess.run(read_result).shape) coord.request_stop() coord.join(threads) #tf的切片操作 a = [lable,imageData1,imageData2 ...] lable = tf.slice(a, [0], [1]) image = tf.slice(a, [1], [size_of_data]) #讀取二進位制檔案程式碼: def binreader(filelist): #1、構造檔案列表 file_queue = tf.train.string_input_producer(filelist) #2、構造閱讀器進行閱讀 reader = tf.FixedLengthRecordReader(1024 * 3 + 1) key, value = reader.read(file_queue) #3、構造解碼器進行解碼 decoded_data = tf.decode_raw(value, tf.uint8) #4、分割出圖片和標籤 lable = tf.slice(decoded_data, [0], [1]) image = tf.slice(decoded_data, [1], [3072]) #5、改變image大小 image = tf.reshape(image, [32, 32, 3]) #6、批處理 batch_lable, batch_image = tf.train.batch([lable,image], batch_size = 10, num_threads = 1, capacity = 100) print(batch_lable, batch_image) return batch_lable, batch_image import tensorflow as tf import os if __name__ == '__main__': #獲取目標路徑下檔案列表 dir_file_list = os.listdir('C:\\Users\\xie\\binary_data\\') #選擇適當檔案 dir_file_list = ['C:\\Users\\xie\\binary_data\\' + i for i in dir_file_list \ if i[-3:] == 'bin'] read_result = binreader(dir_file_list) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord = coord) print(sess.run(read_result)) coord.request_stop() coord.join(threads)
#tf自帶的檔案格式TFRecords
為了將特徵值和標籤值儲存在一個標籤內部,方便存取和移動
用Example協議塊(一個類似字典的格式)儲存樣本
#寫入TFRecord檔案
1、建立TFrecord儲存器
tf.python_io.TFRecordWriter(path)
return 檔案寫入器例項
方法:write(string)寫入一個字串記錄(一個example)
close()
#程式碼示例:
def binreader(filelist):
#1、構造檔案列表
file_queue = tf.train.string_input_producer(filelist)
#2、構造閱讀器進行閱讀
reader = tf.FixedLengthRecordReader(1024 * 3 + 1)
key, value = reader.read(file_queue)
#3、構造解碼器進行解碼
decoded_data = tf.decode_raw(value, tf.uint8)
#4、分割出圖片和標籤
lable = tf.slice(decoded_data, [0], [1])
image = tf.slice(decoded_data, [1], [3072])
#5、改變image大小
image = tf.reshape(image, [32, 32, 3])
#6、批處理
batch_lable, batch_image = tf.train.batch([lable,image], batch_size = 10, num_threads = 1, capacity = 100)
print(batch_lable, batch_image)
return batch_lable, batch_image
def write_to_tfrecords(batch_lable, batch_image):
"""
將檔案的特徵值和目標值存入tfrecords檔案中
:param batch_lable 10個標籤
:param batch_image 10個特徵值
"""
#1、構造TFRecorder儲存器
writer = tf.python_io.TFRecordWriter("./binary_data/TFRecord/cifar.tfrecords")
#2、迴圈將所有樣本寫入檔案,每張圖片都要構造example協議
for i in range(10):
#取出第i個樣本的特徵值和標籤值
image = batch_image[i].eval().tostring()
lable = batch_lable[i].eval()[0]
#構造Example
example = tf.train.Example(features = tf.train.Features(feature = {
"image":tf.train.Feature(bytes_list = tf.train.BytesList(value = [image])),
"lable":tf.train.Feature(int64_list = tf.train.Int64List(value = [lable]))}))
writer.write(example.SerializeToString())
print("%d times of store is over"%i)
writer.close()
import tensorflow as tf
import os
if __name__ == '__main__':
#獲取目標路徑下檔案列表
dir_file_list = os.listdir('C:\\Users\\xie\\binary_data\\')
#選擇適當檔案
dir_file_list = ['C:\\Users\\xie\\binary_data\\' + i for i in dir_file_list \
if i[-3:] == 'bin']
read_result = binreader(dir_file_list)
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess, coord = coord)
sess.run(read_result)
print("start to store")
write_to_tfrecords(read_result[0],read_result[1])
print("end of store")
coord.request_stop()
coord.join(threads)
#讀取tfrecords檔案(多了一步解析)
tf.parse_single_example(serialized, #讀出的內容
features = None,#dict字典資料,鍵位讀取的名字,值為FixedLenFeature
return:一個鍵值對組成的字典,鍵為讀取的名字
#示例程式碼:
def binreader(filelist):
#1、構造檔案列表
file_queue = tf.train.string_input_producer(filelist)
#2、構造閱讀器進行閱讀
reader = tf.FixedLengthRecordReader(1024 * 3 + 1)
key, value = reader.read(file_queue)
#3、構造解碼器進行解碼
decoded_data = tf.decode_raw(value, tf.uint8)
#4、分割出圖片和標籤
lable = tf.slice(decoded_data, [0], [1])
image = tf.slice(decoded_data, [1], [3072])
#5、改變image大小
image = tf.reshape(image, [32, 32, 3])
#6、批處理
batch_lable, batch_image = tf.train.batch([lable,image], batch_size = 10, num_threads = 1, capacity = 100)
print(batch_lable, batch_image)
return batch_lable, batch_image
def write_to_tfrecords(batch_lable, batch_image):
"""
將檔案的特徵值和目標值存入tfrecords檔案中
:param batch_lable 10個標籤
:param batch_image 10個特徵值
"""
#1、構造TFRecorder儲存器
writer = tf.python_io.TFRecordWriter("./binary_data/TFRecord/cifar.tfrecords")
#2、迴圈將所有樣本寫入檔案,每張圖片都要構造example協議
for i in range(10):
#取出第i個樣本的特徵值和標籤值
image = batch_image[i].eval().tostring()
lable = batch_lable[i].eval()[0]
#構造Example
example = tf.train.Example(features = tf.train.Features(feature = {
"image":tf.train.Feature(bytes_list = tf.train.BytesList(value = [image])),
"lable":tf.train.Feature(int64_list = tf.train.Int64List(value = [lable]))}))
writer.write(example.SerializeToString())
print("%d times of store is over"%i)
writer.close()
def read_from_tfrecords():
#1、構造檔案佇列
file_queue = tf.train.string_input_producer(['C:\\Users\\xie\\binary_data\\TFRecord\\cifar.tfrecords'])
#2、構造檔案閱讀器
reader = tf.TFRecordReader()
#3、讀取佇列,value也是一個樣本的序列化值
key, value = reader.read(file_queue)
features = tf.parse_single_example(value,
features = {'image':tf.FixedLenFeature([],tf.string),
'lable':tf.FixedLenFeature([],tf.int64)})
#4、解碼:當且僅當取出string型別時,需要解碼
image = tf.decode_raw(features["image"], tf.uint8)
lable = features["lable"]
image_reshape = tf.reshape(image, [32, 32, 3])
image_batch, lable_batch = tf.train.batch([image_reshape, lable], batch_size = 10, num_threads = 1, capacity = 10)
return image_batch, lable_batch
import tensorflow as tf
import os
if __name__ == '__main__':
#獲取目標路徑下檔案列表
dir_file_list = os.listdir('C:\\Users\\xie\\binary_data\\')
#選擇適當檔案
dir_file_list = ['C:\\Users\\xie\\binary_data\\' + i for i in dir_file_list \
if i[-3:] == 'bin']
image_batch, lable_batch = read_from_tfrecords()
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess, coord = coord)
print(sess.run([image_batch, lable_batch]))
coord.request_stop()
coord.join(threads)