Python資料統計千萬級csv檔案
阿新 • • 發佈:2018-11-20
適合從某列分類,再根據某列進行分類,再求和
自行擴充套件~~~~~
# coding=utf-8 import csv import pandas as pd csv_file=csv.reader(open('1.csv','r')) csv_file2=csv.reader(open('2.csv','r')) csv_file3=csv.reader(open('3.csv','r')) print("計算該檔案中每個企業每天錄音時長\n\n") print(csv_file,'\n') #企業編號 # for row in csv_file: # # print(row) # data=[] # data = [row[0:5]for row in csv_file] # print('data:',data) # print(data[1][3]) column3=[row[2]for row in csv_file]#企業編號 column4=[row[3] for row in csv_file2]#日期 column5=[row[4]for row in csv_file3]#時長 # column3=data[0:-1][2] # column4=data[0::][3] # column5=data[0::][4] # print(column3) # # print(column4) # # print(column5) # print('data:',data) ID=['7100001','7100018'] # rows= [row for row in csv_file] print('企業編號:',ID) date1=[] time1=[] dict1={} date2=[] time2=[] dict2={} # print(len(column3));print(len(column4));print(len(column5)) for i in range(len(column3)): if column3[i] == ID[0]: # print(i) date1.append(column4[i]) time1.append(column5[i]) # print(date1) elif column3[i] == ID[1]: date2.append(column4[i]) time2.append(column5[i]) # print(date2) # for k in range(len(date2)): # pass time20180825=[];time20180824=[];time20180826=[];time20180827=[];time20180828=[];time20180829=[];time20180830=[] time20180831=[];time20180901=[];time20180902=[];time20180903=[];time20180904=[];time20180905=[];time20180906=[] time20180907=[];time20180908=[];time20180909=[];time20180910=[];time20180911=[];time20180912=[];time20180913=[] time20180914=[];time20180915=[];time20180916=[];time20180917=[];time20180918=[];time20180919=[];time20180920=[] time20180921=[];time20180922=[];time20180923=[];time20180924=[];time20180925=[];time20180926=[];time20180927=[] time20180928=[];time20180929=[];time20180930=[];time20180819=[] time20180823=[];time20180822=[];time20180821=[];time20180820=[];time20181001=[] for j in range(len(date1)): if float(date1[j]) <=20180819999999 : # for date1[j] in range(20180824000000,20180824999999): time20180819.append(time1[j]) elif float(date1[j]) > 20180819999999 and float(date1[j]) <=20180820999999 : # for date1[j] in range(20180824000000,20180824999999): time20180820.append(time1[j]) elif float(date1[j]) > 20180820999999 and float(date1[j]) <=20180821999999 : # for date1[j] in range(20180824000000,20180824999999): time20180821.append(time1[j]) elif float(date1[j]) > 20180821999999 and float(date1[j]) <=20180822999999 : # for date1[j] in range(20180824000000,20180824999999): time20180822.append(time1[j]) elif float(date1[j]) > 20180822999999 and float(date1[j]) <=20180823999999 : # for date1[j] in range(20180824000000,20180824999999): time20180823.append(time1[j]) elif float(date1[j]) > 20180823999999 and float(date1[j]) <=20180824999999 : # for date1[j] in range(20180824000000,20180824999999): time20180824.append(time1[j]) elif float(date1[j]) > 20180824999999 and float(date1[j]) <=20180825999999 : time20180825.append(time1[j]) elif float(date1[j]) > 20180825999999 and float(date1[j]) <= 20180826999999: time20180826.append(time1[j]) elif float(date1[j]) > 20180826999999 and float(date1[j]) <= 20180827999999: time20180827.append(time1[j]) elif float(date1[j]) > 20180827999999 and float(date1[j]) <= 20180828999999: time20180828.append(time1[j]) elif float(date1[j]) > 20180828999999 and float(date1[j]) <= 20180829999999: time20180829.append(time1[j]) elif float(date1[j]) > 20180829999999 and float(date1[j]) <= 20180830999999: time20180830.append(time1[j]) elif float(date1[j]) > 20180830999999 and float(date1[j]) <= 20180831999999: time20180831.append(time1[j]) elif float(date1[j]) > 20180831999999 and float(date1[j]) <= 20180901999999: time20180901.append(time1[j]) elif float(date1[j]) > 20180901999999 and float(date1[j]) <= 20180902999999: time20180902.append(time1[j]) elif float(date1[j]) > 20180902999999 and float(date1[j]) <= 20180903999999: time20180903.append(time1[j]) elif float(date1[j]) > 20180903999999 and float(date1[j]) <= 20180904999999: time20180904.append(time1[j]) elif float(date1[j]) > 20180904999999 and float(date1[j]) <= 20180905999999: time20180905.append(time1[j]) elif float(date1[j]) > 20180905999999 and float(date1[j]) <= 20180906999999: time20180906.append(time1[j]) elif float(date1[j]) > 20180906999999 and float(date1[j]) <= 20180907999999: time20180907.append(time1[j]) elif float(date1[j]) > 20180907999999 and float(date1[j]) <= 20180908999999: time20180908.append(time1[j]) elif float(date1[j]) > 20180908999999 and float(date1[j]) <= 20180909999999: time20180909.append(time1[j]) elif float(date1[j]) > 20180909999999 and float(date1[j]) <= 20180910999999: time20180910.append(time1[j]) elif float(date1[j]) > 20180910999999 and float(date1[j]) <= 20180911999999: time20180911.append(time1[j]) elif float(date1[j]) > 20180911999999 and float(date1[j]) <= 20180912999999: time20180912.append(time1[j]) elif float(date1[j]) > 20180912999999 and float(date1[j]) <= 20180913999999: time20180913.append(time1[j]) elif float(date1[j]) > 20180913999999 and float(date1[j]) <= 20180914999999: time20180914.append(time1[j]) elif float(date1[j]) > 20180914999999 and float(date1[j]) <= 20180915999999: time20180915.append(time1[j]) elif float(date1[j]) > 20180915999999 and float(date1[j]) <= 20180916999999: time20180916.append(time1[j]) elif float(date1[j]) > 20180916999999 and float(date1[j]) <= 20180917999999: time20180917.append(time1[j]) elif float(date1[j]) > 20180917999999 and float(date1[j]) <= 20180918999999: time20180918.append(time1[j]) elif float(date1[j]) > 20180918999999 and float(date1[j]) <= 20180919999999: time20180919.append(time1[j]) elif float(date1[j]) > 20180919999999 and float(date1[j]) <= 20180920999999: time20180920.append(time1[j]) elif float(date1[j]) > 20180920999999 and float(date1[j]) <= 20180921999999: time20180921.append(time1[j]) elif float(date1[j]) > 20180921999999 and float(date1[j]) <= 20180922999999: time20180922.append(time1[j]) elif float(date1[j]) > 20180922999999 and float(date1[j]) <= 20180923999999: time20180923.append(time1[j]) elif float(date1[j]) > 20180923999999 and float(date1[j]) <= 20180924999999: time20180924.append(time1[j]) elif float(date1[j]) > 20180924999999 and float(date1[j]) <= 20180925999999: time20180925.append(time1[j]) elif float(date1[j]) > 20180925999999 and float(date1[j]) <= 20180926999999: time20180926.append(time1[j]) elif float(date1[j]) > 20180926999999 and float(date1[j]) <= 20180927999999: time20180927.append(time1[j]) elif float(date1[j]) > 20180927999999 and float(date1[j]) <= 20180928999999: time20180928.append(time1[j]) elif float(date1[j]) > 20180928999999 and float(date1[j]) <= 20180929999999: time20180929.append(time1[j]) elif float(date1[j]) > 20180929999999 and float(date1[j]) <= 20180930999999: time20180930.append(time1[j]) elif float(date1[j]) > 20180930999999: time20181001.append(time1[j]) ###求和 sum20180819=0 for ii in time20180819: sum20180819+=int(ii) print(ID[0]+ '7100001-20180819以前:',sum20180819,'秒-s;',sum20180819/3600,'小時-h;') sum20180820=0 for ii in time20180820: sum20180820+=int(ii) print('7100001-20180820:',sum20180820,'秒-s;',sum20180820/3600,'小時-h;') sum20180821=0 for ii in time20180821: sum20180821+=int(ii) print('7100001-20180821:',sum20180821,'秒-s;',sum20180821/3600,'小時-h;') sum20180822=0 for ii in time20180822: sum20180822+=int(ii) print('7100001-20180822:',sum20180822,'秒-s;',sum20180822/3600,'小時-h;') sum20180823=0 for ii in time20180823: sum20180823+=int(ii) print('7100001-20180823:',sum20180823,'秒-s;',sum20180823/3600,'小時-h;') sum20180824=0 for ii in time20180824: sum20180824+=int(ii) print('7100001-20180824:',sum20180824,'秒-s;',sum20180824/3600,'小時-h;') sum20180825=0 for ij in time20180825: sum20180825+=int(ij) print('7100001-20180825:',sum20180825,'秒-s;',sum20180825/3600,'小時-h;') sum20180826=0 for ij in time20180826: sum20180826+=int(ij) print('7100001-20180826:',sum20180826,'秒-s;',sum20180826/3600,'小時-h;') sum20180827=0 for ij in time20180827: sum20180827+=int(ij) print('7100001-20180827:',sum20180827,'秒-s;',sum20180827/3600,'小時-h;') sum20180828=0 for ij in time20180828: sum20180828+=int(ij) print('7100001-20180828:',sum20180828,'秒-s;',sum20180828/3600,'小時-h;') sum20180829=0 for ij in time20180829: sum20180829+=int(ij) print('7100001-20180829:',sum20180829,'秒-s;',sum20180829/3600,'小時-h;') sum20180830=0 for ij in time20180830: sum20180830+=int(ij) print('7100001-20180830:',sum20180830,'秒-s;',sum20180830/3600,'小時-h;') sum20180831=0 for ij in time20180831: sum20180831+=int(ij) print('7100001-20180831:',sum20180831,'秒-s;',sum20180831/3600,'小時-h;') sum20180901=0 for ij in time20180901: sum20180901+=int(ij) print('7100001-20180901:',sum20180901,'秒-s;',sum20180901/3600,'小時-h;') sum20180902=0 for ij in time20180902: sum20180902+=int(ij) print('7100001-20180902:',sum20180902,'秒-s;',sum20180902/3600,'小時-h;') sum20180903=0 for ij in time20180903: sum20180903+=int(ij) print('7100001-20180903:',sum20180903,'秒-s;',sum20180903/3600,'小時-h;') sum20180904=0 for ij in time20180904: sum20180904+=int(ij) print('7100001-20180904:',sum20180904,'秒-s;',sum20180904/3600,'小時-h;') sum20180905=0 for ij in time20180905: sum20180905+=int(ij) print('7100001-20180905:',sum20180905,'秒-s;',sum20180905/3600,'小時-h;') sum20180906=0 for ij in time20180906: sum20180906+=int(ij) print('7100001-20180906:',sum20180906,'秒-s;',sum20180906/3600,'小時-h;') sum20180907=0 for ij in time20180907: sum20180907+=int(ij) print('7100001-20180907:',sum20180907,'秒-s;',sum20180907/3600,'小時-h;') sum20180908=0 for ij in time20180908: sum20180908+=int(ij) print('7100001-20180908:',sum20180908,'秒-s;',sum20180908/3600,'小時-h;') sum20180909=0 for ij in time20180909: sum20180909+=int(ij) print('7100001-20180909:',sum20180909,'秒-s;',sum20180909/3600,'小時-h;') sum20180910=0 for ij in time20180910: sum20180910+=int(ij) print('7100001-20180910:',sum20180910,'秒-s;',sum20180910/3600,'小時-h;') sum20180911=0 for ij in time20180911: sum20180911+=int(ij) print('7100001-20180911:',sum20180911,'秒-s;',sum20180911/3600,'小時-h;') sum20180912=0 for ij in time20180912: sum20180912+=int(ij) print('7100001-20180912:',sum20180912,'秒-s;',sum20180912/3600,'小時-h;') sum20180913=0 for ij in time20180913: sum20180913+=int(ij) print('7100001-20180913:',sum20180913,'秒-s;',sum20180913/3600,'小時-h;') sum20180914=0 for ij in time20180914: sum20180914+=int(ij) print('7100001-20180914:',sum20180914,'秒-s;',sum20180914/3600,'小時-h;') sum20180915=0 for ij in time20180915: sum20180915+=int(ij) print('7100001-20180915:',sum20180915,'秒-s;',sum20180915/3600,'小時-h;') sum20180916=0 for ij in time20180916: sum20180916+=int(ij) print('7100001-20180916:',sum20180916,'秒-s;',sum20180916/3600,'小時-h;') sum20180917=0 for ij in time20180917: sum20180917+=int(ij) print('7100001-20180917:',sum20180917,'秒-s;',sum20180917/3600,'小時-h;') # sum20180917=0 # for ij in time20180917: # sum20180917+=int(ij) # print('7100001-20180917:',sum20180917) sum20180918=0 for ij in time20180918: sum20180918+=int(ij) print('7100001-20180918:',sum20180918,'秒-s;',sum20180918/3600,'小時-h;') sum20180919=0 for ij in time20180919: sum20180919+=int(ij) print('7100001-20180919:',sum20180919,'秒-s;',sum20180919/3600,'小時-h;') sum20180920=0 for ij in time20180920: sum20180920+=int(ij) print('7100001-20180920:',sum20180920,'秒-s;',sum20180920/3600,'小時-h;') sum20180921=0 for ij in time20180921: sum20180921+=int(ij) print('7100001-20180921:',sum20180921,'秒-s;',sum20180921/3600,'小時-h;') sum20180922=0 for ij in time20180922: sum20180922+=int(ij) print('7100001-20180922:',sum20180922,'秒-s;',sum20180922/3600,'小時-h;') sum20180923=0 for ij in time20180923: sum20180923+=int(ij) print('7100001-20180923:',sum20180923,'秒-s;',sum20180923/3600,'小時-h;') sum20180924=0 for ij in time20180924: sum20180924+=int(ij) print('7100001-20180924:',sum20180924,'秒-s;',sum20180924/3600,'小時-h;') sum20180925=0 for ij in time20180925: sum20180925+=int(ij) print('7100001-20180925:',sum20180925,'秒-s;',sum20180925/3600,'小時-h;') sum20180926=0 for ij in time20180926: sum20180926+=int(ij) print('7100001-20180926:',sum20180926,'秒-s;',sum20180926/3600,'小時-h;') sum20180927=0 for ij in time20180927: sum20180927+=int(ij) print('7100001-20180927:',sum20180927,'秒-s;',sum20180927/3600,'小時-h;') sum20180928=0 for ij in time20180928: sum20180928+=int(ij) print('7100001-20180928:',sum20180928,'秒-s;',sum20180928/3600,'小時-h;') sum20180929=0 for ij in time20180929: sum20180929+=int(ij) print('7100001-20180929:',sum20180929,'秒-s;',sum20180929/3600,'小時-h;') sum20180930=0 for ij in time20180930: sum20180930+=int(ij) print('7100001-20180930:',sum20180930,'秒-s;',sum20180930/3600,'小時-h;') sum20181001=0 for ij in time20181001: sum20181001+=int(ij) print('7100001-20181001以後:',sum20181001,'秒-s;',sum20181001/3600,'小時-h;') print("統計完畢") # sum = sum(time20180824) # print('7100001-20180824:',sum20180824) # print('7100001-20180825:',sum20180825) # # print(column6[0]) #根據第六列將整個檔案分為E7100001和E7100018 # def ClassfiyEnterprise(): # # pass # # # def ClassfiyDate(): # # pass # # # # def SumColumn(): # pass # if __name__ == "__main__": # # # pass