『TensorFlow』隊列&多線程&TFRecod文件_我輩當高歌
阿新 • • 發佈:2017-06-04
gradient 函數 http who epo variable nbsp 其他 新建
TF數據讀取隊列機制詳解
-
TFR文件多線程隊列讀寫操作:
-
-
TFRecod文件寫入操作:
-
import tensorflow as tf def _int64_feature(value): # value必須是可叠代對象 # 非int的數據使用bytes取代int64即可 return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) num_shards = 2 instance_perPshard = 2 for i in range(num_shards): filename = (‘FTR/data.tfrecords-%.5d-of-%.5d‘ % (i, num_shards)) writer = tf.python_io.TFRecordWriter(filename) #<---------書寫器打開 for j in range(instance_perPshard): example = tf.train.Example(features=tf.train.Features(feature={ #<---------書寫入緩沖區 ‘i‘:_int64_feature(i), ‘j‘:_int64_feature(j) })) writer.write(example.SerializeToString()) #<---------書寫入實際文件 writer.close() #<---------書寫器關閉
-
-
TFRecod文件讀取操作:
-
默認多線程,這個默認的多線程過程用於維護文件名隊列
‘‘‘讀取TFR‘‘‘ files = ["FTR/data.tfrecords-00000-of-00002","FTR/data.tfrecords-00001-of-00002"] # files = tf.train.match_filenames_once("FTR/data.tfrecords-*") # 輸入文件名列表 # 返回QueueRunner & FIFOQueue # 打亂順序&加入隊列 和 輸出隊列獲取文件 屬於單獨的線程 filename_queue = tf.train.string_input_producer(files, shuffle=False) #<---------輸入文件隊列 reader = tf.TFRecordReader() #<---------讀取器打開 _,serialized_example = reader.read(filename_queue) #<---------讀取原始文件 features = tf.parse_single_example( #<---------讀取解析後文件 serialized_example, features={ ‘i‘:tf.FixedLenFeature([],tf.int64), ‘j‘:tf.FixedLenFeature([],tf.int64) }) with tf.Session() as sess: tf.global_variables_initializer().run() coord = tf.train.Coordinator() #<---------多線程 threads = tf.train.start_queue_runners(sess=sess,coord=coord) #<---------文件名隊列填充線程啟動 for i in range(6): print(sess.run([features[‘i‘],features[‘j‘]])) #<---------實際會話中啟動讀取過程 coord.request_stop() #<---------多線程 coord.join(threads) #<---------多線程
-
-
TFRecod文件打包操作:
-
打包機制:
——————多線程調用前面的節點計算入隊
——————批量出隊並打包
所以不需要修改解析讀取數據過程為循環之類的,可以說很是方便
example_batch, label_batch = tf.train.batch([example, label], #<---------多線程batch生成 batch_size=batch_size, num_threads=3, capacity=capacity)
example_batch, label_batch = tf.train.shuffle_batch([example, label], #<---------多線程隨機batch生成 batch_size=batch_size, num_threads=3, capacity=capacity,
min_after_dequeue=30) 由於元素太少隨機意義就不大了,所以多了個參數
files = ["FTR/data.tfrecords-00000-of-00002","FTR/data.tfrecords-00001-of-00002"] # files = tf.train.match_filenames_once("FTR/data.tfrecords-*") # 輸入文件名列表 # 返回QueueRunner & FIFOQueue # 打亂順序&加入隊列 和 輸出隊列獲取文件 屬於單獨的線程 filename_queue = tf.train.string_input_producer(files, shuffle=False) #<---------輸入文件隊列 reader = tf.TFRecordReader() #<---------讀取 _,serialized_example = reader.read(filename_queue) #<---------讀取 features = tf.parse_single_example( #<---------讀取 serialized_example, features={ ‘i‘:tf.FixedLenFeature([],tf.int64), ‘j‘:tf.FixedLenFeature([],tf.int64) }) example, label = features[‘i‘], features[‘j‘] batch_size = 2 capacity = 1000 + 3 * batch_size # 入隊單個樣例,出隊batch # 可以指定多個線程同時執行入隊操作 example_batch, label_batch = tf.train.batch([example, label], #<---------多線程batch生成 batch_size=batch_size, num_threads=3, capacity=capacity) with tf.Session() as sess: tf.global_variables_initializer().run() coord = tf.train.Coordinator() #<---------多線程管理器 threads = tf.train.start_queue_runners(sess=sess,coord=coord) #<---------文件名隊列填充線程啟動 for i in range(3): cur_example_batch, cur_label_batch = sess.run([example_batch, label_batch]) print(cur_example_batch, cur_label_batch) coord.request_stop() #<---------多線程關閉 coord.join(threads)
這個輸出每一行前為image(代指),後為label,第一行的數據對實際為0-0,0-1:
[0 0] [0 1] [1 1] [0 1] [0 0] [0 1]
-
圖片文件使用TFR讀寫測試:
read的二進制數據直接進行_bytes_feature化就可以寫入文件,使用tf.string類型讀出圖片數據後可以直接decode解碼之(推測tf中string對應二進制數據類型)。
把一張圖片寫入TFR中:
import tensorflow as tf import matplotlib.pyplot as plt def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) img_raw = tf.gfile.FastGFile(‘123123.jpeg‘,‘rb‘).read() filename = (‘FTR/image.tfrecords‘) writer = tf.python_io.TFRecordWriter(filename) #<---------書寫 example = tf.train.Example(features=tf.train.Features(feature={ #<---------書寫 ‘image‘:_bytes_feature(img_raw), ‘label‘:_int64_feature(1) })) writer.write(example.SerializeToString()) #<---------書寫 writer.close()
從TFR中讀取圖片數據並解碼繪制出來:
filename_queue = tf.train.string_input_producer([‘FTR/image.tfrecords‘], shuffle=False) #<---------輸入文件隊列 reader = tf.TFRecordReader() #<---------讀取 _,serialized_example = reader.read(filename_queue) #<---------讀取 features = tf.parse_single_example( #<---------讀取 serialized_example, features={ ‘image‘:tf.FixedLenFeature([],tf.string), ‘label‘:tf.FixedLenFeature([],tf.int64) }) img = tf.image.decode_jpeg(features[‘image‘]) with tf.Session() as sess: tf.global_variables_initializer().run() coord = tf.train.Coordinator() # <---------多線程 threads = tf.train.start_queue_runners(sess=sess, coord=coord) # <---------文件名隊列填充線程啟動 # img_raw, label = sess.run([features[‘image‘], features[‘label‘]]) image = sess.run(img) plt.imshow(image) plt.show() coord.request_stop() # <---------多線程 coord.join(threads) # <---------多線程
-
圖片文件直接使用隊列讀寫操作:
僅僅示範了維護圖片文件名隊列的讀寫,沒有過多的其他操作
reader = tf.WholeFileReader():新的讀取器,應該是範用性二進制文件讀取器
# 導入tensorflow import tensorflow as tf # 新建一個Session with tf.Session() as sess: # 我們要讀三幅圖片A.jpg, B.jpg, C.jpg filename = [‘123.png‘, ‘123123.jpeg‘] # string_input_producer會產生一個文件名隊列 filename_queue = tf.train.string_input_producer(filename, shuffle=False, num_epochs=5) # reader從文件名隊列中讀數據。對應的方法是reader.read reader = tf.WholeFileReader() #<---------註意讀取器不一樣了 key, value = reader.read(filename_queue) # tf.train.string_input_producer定義了一個epoch變量,要對它進行初始化 tf.local_variables_initializer().run() # 使用start_queue_runners之後,才會開始填充隊列 threads = tf.train.start_queue_runners(sess=sess) i = 0 while True: i += 1 # 獲取圖片數據並保存 image_data = sess.run(value) with open(‘test_%d.jpg‘ % i, ‘wb‘) as f: f.write(image_data)
-
書上的隊列文件使用樣例:
文件名隊列創建->讀取解析文件->打包解析好的文件->多線程啟動圖訓練(多線程指被使用的部分其實還是文件讀取)
import tensorflow as tf ‘‘‘創建文件列表‘‘‘ files = tf.train.match_filenames_once("Records/output.tfrecords") filename_queue = tf.train.string_input_producer(files, shuffle=False) ‘‘‘解析TFRecord文件裏的數據‘‘‘ # 讀取文件。 reader = tf.TFRecordReader() _,serialized_example = reader.read(filename_queue) # 解析讀取的樣例。 features = tf.parse_single_example( serialized_example, features={ ‘image_raw‘:tf.FixedLenFeature([],tf.string), ‘pixels‘:tf.FixedLenFeature([],tf.int64), ‘label‘:tf.FixedLenFeature([],tf.int64) }) decoded_images = tf.decode_raw(features[‘image_raw‘],tf.uint8) retyped_images = tf.cast(decoded_images, tf.float32) labels = tf.cast(features[‘label‘],tf.int32) #pixels = tf.cast(features[‘pixels‘],tf.int32) images = tf.reshape(retyped_images, [784]) ‘‘‘將文件以100個為一組打包‘‘‘ min_after_dequeue = 10000 batch_size = 100 capacity = min_after_dequeue + 3 * batch_size image_batch, label_batch = tf.train.shuffle_batch([images, labels], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) ‘‘‘訓練模型‘‘‘ def inference(input_tensor, weights1, biases1, weights2, biases2): layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1) return tf.matmul(layer1, weights2) + biases2 # 模型相關的參數 INPUT_NODE = 784 OUTPUT_NODE = 10 LAYER1_NODE = 500 REGULARAZTION_RATE = 0.0001 TRAINING_STEPS = 5000 weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1)) biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE])) weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1)) biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE])) y = inference(image_batch, weights1, biases1, weights2, biases2) # 計算交叉熵及其平均值 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=label_batch) cross_entropy_mean = tf.reduce_mean(cross_entropy) # 損失函數的計算 regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE) regularaztion = regularizer(weights1) + regularizer(weights2) loss = cross_entropy_mean + regularaztion # 優化損失函數 train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss) # 初始化回話並開始訓練過程。 with tf.Session() as sess: tf.global_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 循環的訓練神經網絡。 for i in range(TRAINING_STEPS): if i % 1000 == 0: print("After %d training step(s), loss is %g " % (i, sess.run(loss))) sess.run(train_step) coord.request_stop() coord.join(threads)
『TensorFlow』隊列&多線程&TFRecod文件_我輩當高歌