Numpy學習(3):將mnist資料檔案讀入到資料結構(numpy陣列)中
阿新 • • 發佈:2019-01-03
''' 使用python解析二進位制檔案 ''' import numpy as np import struct def loadImageSet(filename): binfile = open(filename, 'rb') # 讀取二進位制檔案 buffers = binfile.read() head = struct.unpack_from('>IIII', buffers, 0) # 取前4個整數,返回一個元組 offset = struct.calcsize('>IIII') # 定位到data開始的位置 imgNum = head[1] width = head[2] height = head[3] bits = imgNum * width * height # data一共有60000*28*28個畫素值 bitsString = '>' + str(bits) + 'B' # fmt格式:'>47040000B' imgs = struct.unpack_from(bitsString, buffers, offset) # 取data資料,返回一個元組 binfile.close() imgs = np.reshape(imgs, [imgNum, width * height]) # reshape為[60000,784]型陣列 return imgs,head def loadLabelSet(filename): binfile = open(filename, 'rb') # 讀二進位制檔案 buffers = binfile.read() head = struct.unpack_from('>II', buffers, 0) # 取label檔案前2個整形數 labelNum = head[1] offset = struct.calcsize('>II') # 定位到label資料開始的位置 numString = '>' + str(labelNum) + "B" # fmt格式:'>60000B' labels = struct.unpack_from(numString, buffers, offset) # 取label資料 binfile.close() labels = np.reshape(labels, [labelNum]) # 轉型為列表(一維陣列) return labels,head if __name__ == "__main__": file1= 'E:/pythonProjects/dataSets/mnist/train-images.idx3-ubyte' file2= 'E:/pythonProjects/dataSets/mnist/train-labels.idx1-ubyte' imgs,data_head = loadImageSet(file1) print('data_head:',data_head) print(type(imgs)) print('imgs_array:',imgs) print(np.reshape(imgs[1,:],[28,28])) #取出其中一張圖片的畫素,轉型為28*28,大致就能從影象上看出是幾啦 print('----------我是分割線-----------') labels,labels_head = loadLabelSet(file2) print('labels_head:',labels_head) print(type(labels)) print(labels)