1. 程式人生 > >Numpy學習(3):將mnist資料檔案讀入到資料結構(numpy陣列)中

Numpy學習(3):將mnist資料檔案讀入到資料結構(numpy陣列)中

'''
    使用python解析二進位制檔案
'''
import numpy as np
import struct

def loadImageSet(filename):

    binfile = open(filename, 'rb') # 讀取二進位制檔案
    buffers = binfile.read()

    head = struct.unpack_from('>IIII', buffers, 0) # 取前4個整數,返回一個元組

    offset = struct.calcsize('>IIII')  # 定位到data開始的位置
    imgNum = head[1]
    width = head[2]
    height = head[3]

    bits = imgNum * width * height  # data一共有60000*28*28個畫素值
    bitsString = '>' + str(bits) + 'B'  # fmt格式:'>47040000B'

    imgs = struct.unpack_from(bitsString, buffers, offset) # 取data資料,返回一個元組

    binfile.close()
    imgs = np.reshape(imgs, [imgNum, width * height]) # reshape為[60000,784]型陣列

    return imgs,head


def loadLabelSet(filename):

    binfile = open(filename, 'rb') # 讀二進位制檔案
    buffers = binfile.read()

    head = struct.unpack_from('>II', buffers, 0) # 取label檔案前2個整形數

    labelNum = head[1]
    offset = struct.calcsize('>II')  # 定位到label資料開始的位置

    numString = '>' + str(labelNum) + "B" # fmt格式:'>60000B'
    labels = struct.unpack_from(numString, buffers, offset) # 取label資料

    binfile.close()
    labels = np.reshape(labels, [labelNum]) # 轉型為列表(一維陣列)

    return labels,head


if __name__ == "__main__":
    file1= 'E:/pythonProjects/dataSets/mnist/train-images.idx3-ubyte'
    file2= 'E:/pythonProjects/dataSets/mnist/train-labels.idx1-ubyte'

    imgs,data_head = loadImageSet(file1)
    print('data_head:',data_head)
    print(type(imgs))
    print('imgs_array:',imgs)
    print(np.reshape(imgs[1,:],[28,28])) #取出其中一張圖片的畫素,轉型為28*28,大致就能從影象上看出是幾啦

    print('----------我是分割線-----------')

    labels,labels_head = loadLabelSet(file2)
    print('labels_head:',labels_head)
    print(type(labels))
    print(labels)