1. 程式人生 > >遍歷win10文件夾並解析json文件,按照json格式存入mongo數據庫(基於python 3.6)

遍歷win10文件夾並解析json文件,按照json格式存入mongo數據庫(基於python 3.6)

dep IT os.path efault gpo dir ren ica not

import re
import json
from pymongo import MongoClient
import os

# def Write_json(data):
# open_Json = open("1.json", "a")
# Write_json = json.dumps(data)
# open_Json.write(Write_json)
# open_Json.close()

def load_Mongo(Address, port, Process_data, Name):
client = MongoClient(Address, port) #數據地址+端口號
db = client.portal #數據庫
collection = db.typhoons #數據表
# collection.find({Name})
collection.insert(Process_data)
for i in collection.find({"ename": Name}):
id = str(i[‘_id‘])
print(‘已存放,‘ + ‘ObjectID:‘ + ‘‘ + id)
def loadFont(address, port, Json_File):
n = 0
Date = []
Info = []
ADV= []
LAT= []
LON= []
TIME= []
WIND= []
PR= []
STAT = []
Data_Header = []
json_File = Json_File #json文件地址
with open(json_File, encoding=‘utf-8‘) as f:
while True:
lines = f.readline()
n = n+1
if not lines:
break
pass

Colon = re.findall(r‘:‘, lines, re.I)
if Colon: #處理第一行數據
date = re.split(r":", lines) #分割冒號
date = date[1].replace(\r, ‘‘).replace(\n, ‘‘).replace(\t, ‘‘).replace(‘ ‘, ‘‘)
Date.append(date)
# print(Date)
else:


if n == 2:
info = lines
info = info.replace(\r, ‘‘).replace(\n, ‘‘).replace(\t, ‘‘)
Info.append(info)
else:
if n == 3:
Data_header = re.split(r" ", lines)
while ‘‘ in Data_header: #移除空格
Data_header.remove(‘‘)
for head_Num in range(len(Data_header)):
head_Find = re.findall(r‘\n‘, Data_header[head_Num], re.I)
if head_Find: #查找\n並刪除,然後加入json文件
Data_header[head_Num] = Data_header[head_Num].replace(\r, ‘‘).replace(\n, ‘‘).replace(\t, ‘‘)
# json_List.setdefault(Data_header[head_Num]) #增加鍵值對
Data_Header.append(Data_header[head_Num])

else:
head_Data = Data_header[head_Num]
# json_List.setdefault(head_Data) #增加鍵值對
Data_Header.append(head_Data)
# Write_json(json_List) #寫入json
# print(Data_header)


else:
if n > 3:
all = re.split(r" ", lines)
while ‘‘ in all:
all.remove(‘‘)
ADV.append(all[0])
LAT.append(all[1])
LON.append(all[2])
TIME.append(all[3])
WIND.append(all[4])
PR.append(all[5])
stat = []
for count in range(len(all)):
if count > 5:
stat.append(all[count])
# print(stat)
else:
continue
stat = " ".join(str(i) for i in stat) #合並數據
stat = stat.replace(\r, ‘‘).replace(\n, ‘‘).replace(\t, ‘‘)
STAT.append(stat)
# print(STAT)


#設計字典

Addit_all = [ ]
json_List = {‘ename‘: ‘‘, ‘is_current‘: ‘null‘,‘enddate‘:‘null‘, ‘name‘:‘null‘, ‘sn‘:‘null‘,‘land‘:[ ], ‘startdate‘:‘‘,
‘track‘:[ ] }
json_List[‘startdate‘] = Date[0]
json_List[‘ename‘] = Info[0]
i = len(ADV)
j = 0
while j < i:
# json_List [‘startdate‘] = TIME[j]
addit = {}
addit [‘forecast‘] = ‘null‘
if STAT[j] == ‘TROPICAL STORM‘:
addit[‘type‘] = ‘TS‘
elif STAT[j] == ‘TROPICAL DEPRESSION‘:
addit[‘type‘] = ‘TD‘
else:


addit[‘type‘] = STAT[j]
addit[‘speed‘] = WIND[j]
addit[‘moveDirection‘] =‘null‘
# addit[‘ADV‘] = ADV[j]
addit[‘longitude‘] = LON[j]
addit[‘latitude‘] = LAT[j]
addit [‘radius10‘] = ‘null‘
addit[‘time‘] = TIME[j]
addit[‘radius7‘] = ‘null‘
addit [‘pressure‘] = PR[j]
addit [‘power‘] = ‘null‘
addit[‘des‘] = ‘‘
addit[‘moveSpeed‘] = ‘null‘
Addit_all.append(addit) #將多組數據加入數組
j = j + 1
json_List[‘track‘] = Addit_all
# Write_json(json_List)
print(json_List)
load_Mongo(address, port, json_List, Info[0])


List = []
def traverse(json_Dir):
fs = os.listdir(json_Dir)
for file in fs:
tmp_path = os.path.join(json_Dir, file)
if not os.path.isdir(tmp_path):
file_path = tmp_path
List.append(file_path)
# print(‘文件: %s‘ % tmp_path)
else:
# print(‘文件夾:%s‘ % tmp_path)
traverse(tmp_path)
return List

if __name__=="__main__":
mongo_address = ‘127.0.0.1‘ #數據庫地址
mongo_port = 27017 #數據庫端口
json_Path = ‘E:\Typhoon_data\Data‘ # json總文件夾地址
traverse(json_Path) #遍歷文件夾,獲取文件地址
for json_Filenum in range(len(List)):
loadFont(mongo_address, mongo_port, List[json_Filenum]) #載入mongo

遍歷win10文件夾並解析json文件,按照json格式存入mongo數據庫(基於python 3.6)