1. 程式人生 > >python分塊讀取大資料,避免記憶體不足

python分塊讀取大資料,避免記憶體不足

def read_data(file_name):
    '''
    file_name:檔案地址
    '''
    inputfile = open(file_name, 'rb')   #可開啟含有中文的地址
    data = pd.read_csv(inputfile, iterator=True)
    loop = True
    chunkSize = 1000    #一千行一塊
    chunks = []
    while loop:
        try:
            chunk = dcs.get_chunk(chunkSize)
            chunks.append(chunk)
        except StopIteration:
            loop = False
            print("Iteration is stopped.")
    data = pd.concat(chunks, ignore_index=True)
    #print(train.head())
    return data