1. 程式人生 > >Python資料統計千萬級csv檔案

Python資料統計千萬級csv檔案

 適合從某列分類,再根據某列進行分類,再求和

自行擴充套件~~~~~

# coding=utf-8

import csv
import pandas as pd

csv_file=csv.reader(open('1.csv','r'))
csv_file2=csv.reader(open('2.csv','r'))
csv_file3=csv.reader(open('3.csv','r'))
print("計算該檔案中每個企業每天錄音時長\n\n")
print(csv_file,'\n')
#企業編號
# for row in csv_file:
#
#     print(row)
#     data=[]

# data = [row[0:5]for row in csv_file]
# print('data:',data)
# print(data[1][3])


column3=[row[2]for row in csv_file]#企業編號
column4=[row[3] for row in csv_file2]#日期
column5=[row[4]for row in csv_file3]#時長
# column3=data[0:-1][2]
# column4=data[0::][3]
# column5=data[0::][4]
# print(column3)
#
# print(column4)
#
# print(column5)

# print('data:',data)
ID=['7100001','7100018']
# rows= [row for row in csv_file]
print('企業編號:',ID)
date1=[]
time1=[]
dict1={}

date2=[]
time2=[]
dict2={}

# print(len(column3));print(len(column4));print(len(column5))


for i in range(len(column3)):
    if column3[i] == ID[0]:
        # print(i)
        date1.append(column4[i])
        time1.append(column5[i])
        # print(date1)
    elif column3[i] == ID[1]:
        date2.append(column4[i])
        time2.append(column5[i])
        # print(date2)
        # for k in range(len(date2)):
        #     pass

time20180825=[];time20180824=[];time20180826=[];time20180827=[];time20180828=[];time20180829=[];time20180830=[]
time20180831=[];time20180901=[];time20180902=[];time20180903=[];time20180904=[];time20180905=[];time20180906=[]
time20180907=[];time20180908=[];time20180909=[];time20180910=[];time20180911=[];time20180912=[];time20180913=[]
time20180914=[];time20180915=[];time20180916=[];time20180917=[];time20180918=[];time20180919=[];time20180920=[]
time20180921=[];time20180922=[];time20180923=[];time20180924=[];time20180925=[];time20180926=[];time20180927=[]
time20180928=[];time20180929=[];time20180930=[];time20180819=[]
time20180823=[];time20180822=[];time20180821=[];time20180820=[];time20181001=[]

for j in range(len(date1)):
    if float(date1[j])   <=20180819999999 :
    # for date1[j] in range(20180824000000,20180824999999):
        time20180819.append(time1[j])
    elif float(date1[j])  > 20180819999999 and float(date1[j]) <=20180820999999 :
    # for date1[j] in range(20180824000000,20180824999999):
        time20180820.append(time1[j])
    elif float(date1[j])  > 20180820999999 and float(date1[j]) <=20180821999999 :
    # for date1[j] in range(20180824000000,20180824999999):
        time20180821.append(time1[j])
    elif float(date1[j])  > 20180821999999 and float(date1[j]) <=20180822999999 :
    # for date1[j] in range(20180824000000,20180824999999):
        time20180822.append(time1[j])
    elif float(date1[j])  > 20180822999999 and float(date1[j]) <=20180823999999 :
    # for date1[j] in range(20180824000000,20180824999999):
        time20180823.append(time1[j])
    elif float(date1[j])  > 20180823999999 and float(date1[j]) <=20180824999999 :
    # for date1[j] in range(20180824000000,20180824999999):
        time20180824.append(time1[j])
    elif float(date1[j])  > 20180824999999 and float(date1[j]) <=20180825999999 :
        time20180825.append(time1[j])
    elif float(date1[j]) > 20180825999999 and float(date1[j]) <= 20180826999999:
        time20180826.append(time1[j])
    elif float(date1[j]) > 20180826999999 and float(date1[j]) <= 20180827999999:
        time20180827.append(time1[j])
    elif float(date1[j]) > 20180827999999 and float(date1[j]) <= 20180828999999:
        time20180828.append(time1[j])
    elif float(date1[j]) > 20180828999999 and float(date1[j]) <= 20180829999999:
        time20180829.append(time1[j])
    elif float(date1[j]) > 20180829999999 and float(date1[j]) <= 20180830999999:
        time20180830.append(time1[j])
    elif float(date1[j]) > 20180830999999 and float(date1[j]) <= 20180831999999:
        time20180831.append(time1[j])
    elif float(date1[j]) > 20180831999999 and float(date1[j]) <= 20180901999999:
        time20180901.append(time1[j])
    elif float(date1[j]) > 20180901999999 and float(date1[j]) <= 20180902999999:
        time20180902.append(time1[j])
    elif float(date1[j]) > 20180902999999 and float(date1[j]) <= 20180903999999:
        time20180903.append(time1[j])
    elif float(date1[j]) > 20180903999999 and float(date1[j]) <= 20180904999999:
        time20180904.append(time1[j])
    elif float(date1[j]) > 20180904999999 and float(date1[j]) <= 20180905999999:
        time20180905.append(time1[j])
    elif float(date1[j]) > 20180905999999 and float(date1[j]) <= 20180906999999:
        time20180906.append(time1[j])
    elif float(date1[j]) > 20180906999999 and float(date1[j]) <= 20180907999999:
        time20180907.append(time1[j])
    elif float(date1[j]) > 20180907999999 and float(date1[j]) <= 20180908999999:
        time20180908.append(time1[j])
    elif float(date1[j]) > 20180908999999 and float(date1[j]) <= 20180909999999:
        time20180909.append(time1[j])
    elif float(date1[j]) > 20180909999999 and float(date1[j]) <= 20180910999999:
        time20180910.append(time1[j])
    elif float(date1[j]) > 20180910999999 and float(date1[j]) <= 20180911999999:
        time20180911.append(time1[j])
    elif float(date1[j]) > 20180911999999 and float(date1[j]) <= 20180912999999:
        time20180912.append(time1[j])
    elif float(date1[j]) > 20180912999999 and float(date1[j]) <= 20180913999999:
        time20180913.append(time1[j])
    elif float(date1[j]) > 20180913999999 and float(date1[j]) <= 20180914999999:
        time20180914.append(time1[j])
    elif float(date1[j]) > 20180914999999 and float(date1[j]) <= 20180915999999:
        time20180915.append(time1[j])
    elif float(date1[j]) > 20180915999999 and float(date1[j]) <= 20180916999999:
        time20180916.append(time1[j])
    elif float(date1[j]) > 20180916999999 and float(date1[j]) <= 20180917999999:
        time20180917.append(time1[j])
    elif float(date1[j]) > 20180917999999 and float(date1[j]) <= 20180918999999:
        time20180918.append(time1[j])
    elif float(date1[j]) > 20180918999999 and float(date1[j]) <= 20180919999999:
        time20180919.append(time1[j])
    elif float(date1[j]) > 20180919999999 and float(date1[j]) <= 20180920999999:
        time20180920.append(time1[j])
    elif float(date1[j]) > 20180920999999 and float(date1[j]) <= 20180921999999:
        time20180921.append(time1[j])
    elif float(date1[j]) > 20180921999999 and float(date1[j]) <= 20180922999999:
        time20180922.append(time1[j])
    elif float(date1[j]) > 20180922999999 and float(date1[j]) <= 20180923999999:
        time20180923.append(time1[j])
    elif float(date1[j]) > 20180923999999 and float(date1[j]) <= 20180924999999:
        time20180924.append(time1[j])
    elif float(date1[j]) > 20180924999999 and float(date1[j]) <= 20180925999999:
        time20180925.append(time1[j])
    elif float(date1[j]) > 20180925999999 and float(date1[j]) <= 20180926999999:
        time20180926.append(time1[j])
    elif float(date1[j]) > 20180926999999 and float(date1[j]) <= 20180927999999:
        time20180927.append(time1[j])
    elif float(date1[j]) > 20180927999999 and float(date1[j]) <= 20180928999999:
        time20180928.append(time1[j])
    elif float(date1[j]) > 20180928999999 and float(date1[j]) <= 20180929999999:
        time20180929.append(time1[j])
    elif float(date1[j]) > 20180929999999 and float(date1[j]) <= 20180930999999:
        time20180930.append(time1[j])
    elif float(date1[j]) > 20180930999999:
        time20181001.append(time1[j])
###求和
sum20180819=0
for ii in time20180819:
    sum20180819+=int(ii)
print(ID[0]+  '7100001-20180819以前:',sum20180819,'秒-s;',sum20180819/3600,'小時-h;')
sum20180820=0
for ii in time20180820:
    sum20180820+=int(ii)
print('7100001-20180820:',sum20180820,'秒-s;',sum20180820/3600,'小時-h;')
sum20180821=0
for ii in time20180821:
    sum20180821+=int(ii)
print('7100001-20180821:',sum20180821,'秒-s;',sum20180821/3600,'小時-h;')
sum20180822=0
for ii in time20180822:
    sum20180822+=int(ii)
print('7100001-20180822:',sum20180822,'秒-s;',sum20180822/3600,'小時-h;')
sum20180823=0
for ii in time20180823:
    sum20180823+=int(ii)
print('7100001-20180823:',sum20180823,'秒-s;',sum20180823/3600,'小時-h;')

sum20180824=0
for ii in time20180824:
    sum20180824+=int(ii)
print('7100001-20180824:',sum20180824,'秒-s;',sum20180824/3600,'小時-h;')

sum20180825=0
for ij in time20180825:
    sum20180825+=int(ij)
print('7100001-20180825:',sum20180825,'秒-s;',sum20180825/3600,'小時-h;')

sum20180826=0
for ij in time20180826:
    sum20180826+=int(ij)
print('7100001-20180826:',sum20180826,'秒-s;',sum20180826/3600,'小時-h;')

sum20180827=0
for ij in time20180827:
    sum20180827+=int(ij)
print('7100001-20180827:',sum20180827,'秒-s;',sum20180827/3600,'小時-h;')

sum20180828=0
for ij in time20180828:
    sum20180828+=int(ij)
print('7100001-20180828:',sum20180828,'秒-s;',sum20180828/3600,'小時-h;')

sum20180829=0
for ij in time20180829:
    sum20180829+=int(ij)
print('7100001-20180829:',sum20180829,'秒-s;',sum20180829/3600,'小時-h;')

sum20180830=0
for ij in time20180830:
    sum20180830+=int(ij)
print('7100001-20180830:',sum20180830,'秒-s;',sum20180830/3600,'小時-h;')

sum20180831=0
for ij in time20180831:
    sum20180831+=int(ij)
print('7100001-20180831:',sum20180831,'秒-s;',sum20180831/3600,'小時-h;')

sum20180901=0
for ij in time20180901:
    sum20180901+=int(ij)
print('7100001-20180901:',sum20180901,'秒-s;',sum20180901/3600,'小時-h;')

sum20180902=0
for ij in time20180902:
    sum20180902+=int(ij)
print('7100001-20180902:',sum20180902,'秒-s;',sum20180902/3600,'小時-h;')

sum20180903=0
for ij in time20180903:
    sum20180903+=int(ij)
print('7100001-20180903:',sum20180903,'秒-s;',sum20180903/3600,'小時-h;')

sum20180904=0
for ij in time20180904:
    sum20180904+=int(ij)
print('7100001-20180904:',sum20180904,'秒-s;',sum20180904/3600,'小時-h;')

sum20180905=0
for ij in time20180905:
    sum20180905+=int(ij)
print('7100001-20180905:',sum20180905,'秒-s;',sum20180905/3600,'小時-h;')

sum20180906=0
for ij in time20180906:
    sum20180906+=int(ij)
print('7100001-20180906:',sum20180906,'秒-s;',sum20180906/3600,'小時-h;')

sum20180907=0
for ij in time20180907:
    sum20180907+=int(ij)
print('7100001-20180907:',sum20180907,'秒-s;',sum20180907/3600,'小時-h;')

sum20180908=0
for ij in time20180908:
    sum20180908+=int(ij)
print('7100001-20180908:',sum20180908,'秒-s;',sum20180908/3600,'小時-h;')

sum20180909=0
for ij in time20180909:
    sum20180909+=int(ij)
print('7100001-20180909:',sum20180909,'秒-s;',sum20180909/3600,'小時-h;')

sum20180910=0
for ij in time20180910:
    sum20180910+=int(ij)
print('7100001-20180910:',sum20180910,'秒-s;',sum20180910/3600,'小時-h;')
sum20180911=0
for ij in time20180911:
    sum20180911+=int(ij)
print('7100001-20180911:',sum20180911,'秒-s;',sum20180911/3600,'小時-h;')
sum20180912=0
for ij in time20180912:
    sum20180912+=int(ij)
print('7100001-20180912:',sum20180912,'秒-s;',sum20180912/3600,'小時-h;')

sum20180913=0
for ij in time20180913:
    sum20180913+=int(ij)
print('7100001-20180913:',sum20180913,'秒-s;',sum20180913/3600,'小時-h;')
sum20180914=0
for ij in time20180914:
    sum20180914+=int(ij)
print('7100001-20180914:',sum20180914,'秒-s;',sum20180914/3600,'小時-h;')
sum20180915=0
for ij in time20180915:
    sum20180915+=int(ij)
print('7100001-20180915:',sum20180915,'秒-s;',sum20180915/3600,'小時-h;')

sum20180916=0
for ij in time20180916:
    sum20180916+=int(ij)
print('7100001-20180916:',sum20180916,'秒-s;',sum20180916/3600,'小時-h;')

sum20180917=0
for ij in time20180917:
    sum20180917+=int(ij)
print('7100001-20180917:',sum20180917,'秒-s;',sum20180917/3600,'小時-h;')

# sum20180917=0
# for ij in time20180917:
#     sum20180917+=int(ij)
# print('7100001-20180917:',sum20180917)

sum20180918=0
for ij in time20180918:
    sum20180918+=int(ij)
print('7100001-20180918:',sum20180918,'秒-s;',sum20180918/3600,'小時-h;')
sum20180919=0
for ij in time20180919:
    sum20180919+=int(ij)
print('7100001-20180919:',sum20180919,'秒-s;',sum20180919/3600,'小時-h;')
sum20180920=0
for ij in time20180920:
    sum20180920+=int(ij)
print('7100001-20180920:',sum20180920,'秒-s;',sum20180920/3600,'小時-h;')
sum20180921=0
for ij in time20180921:
    sum20180921+=int(ij)
print('7100001-20180921:',sum20180921,'秒-s;',sum20180921/3600,'小時-h;')
sum20180922=0
for ij in time20180922:
    sum20180922+=int(ij)
print('7100001-20180922:',sum20180922,'秒-s;',sum20180922/3600,'小時-h;')
sum20180923=0
for ij in time20180923:
    sum20180923+=int(ij)
print('7100001-20180923:',sum20180923,'秒-s;',sum20180923/3600,'小時-h;')
sum20180924=0
for ij in time20180924:
    sum20180924+=int(ij)
print('7100001-20180924:',sum20180924,'秒-s;',sum20180924/3600,'小時-h;')
sum20180925=0
for ij in time20180925:
    sum20180925+=int(ij)
print('7100001-20180925:',sum20180925,'秒-s;',sum20180925/3600,'小時-h;')
sum20180926=0
for ij in time20180926:
    sum20180926+=int(ij)
print('7100001-20180926:',sum20180926,'秒-s;',sum20180926/3600,'小時-h;')
sum20180927=0
for ij in time20180927:
    sum20180927+=int(ij)
print('7100001-20180927:',sum20180927,'秒-s;',sum20180927/3600,'小時-h;')
sum20180928=0
for ij in time20180928:
    sum20180928+=int(ij)
print('7100001-20180928:',sum20180928,'秒-s;',sum20180928/3600,'小時-h;')
sum20180929=0
for ij in time20180929:
    sum20180929+=int(ij)
print('7100001-20180929:',sum20180929,'秒-s;',sum20180929/3600,'小時-h;')
sum20180930=0
for ij in time20180930:
    sum20180930+=int(ij)
print('7100001-20180930:',sum20180930,'秒-s;',sum20180930/3600,'小時-h;')
sum20181001=0
for ij in time20181001:
    sum20181001+=int(ij)
print('7100001-20181001以後:',sum20181001,'秒-s;',sum20181001/3600,'小時-h;')


print("統計完畢")







# sum = sum(time20180824)
# print('7100001-20180824:',sum20180824)
# print('7100001-20180825:',sum20180825)
#


# print(column6[0])

#根據第六列將整個檔案分為E7100001和E7100018
# def ClassfiyEnterprise():
#
#     pass
#
#
# def ClassfiyDate():
#
#     pass
#
#
#
# def SumColumn():
#     pass



# if __name__ == "__main__":
#
#
#     pass