python爬蟲系列(4.1-關於檔案的寫入)
一、關於python中json模組的回顧
1、json.dumps():將python中字典轉換為json字串
2、json.loads():將json字串轉換為python字典
1、使用前面使用 bs4 爬取獲取貴州農產品爬取的資料
2、儲存到本地檔案中
...
def down_data(self):
"""
下載資料
:return:
"""
soup = BeautifulSoup(self.get_html, 'lxml')
table = soup.find('table', attrs={'class': 'table table-hover'})
trs = table.find('tbody').find_all('tr')
food_list = []
fb = open('food.json', 'a', encoding='utf-8')
for tr in trs:
food_dict = {}
tds = tr.find_all('td')
name = tds[0].get_text()
price = tds[1].get_text()
address = tds[3].get_text()
time = tds[4].get_text()
food_dict['name'] = name
food_dict['price'] = price
food_dict['address'] = address
food_dict['time'] = time
food_list.append(food_dict)
# 將資料寫入本地檔案中
fb.write(json.dumps(food_dict, indent=2, ensure_ascii=False) + ',\n')
fb.close()
return food_list
1、導包
import codecs
2、儲存資料的程式碼
def down_data(self):
"""
下載資料
:return:
"""
soup = BeautifulSoup(self.get_html, 'lxml')
table = soup.find('table', attrs={'class': 'table table-hover'})
trs = table.find('tbody').find_all('tr')
food_list = []
# 使用codecs的方法開啟
fb = codecs.open('food.json', 'wb', encoding='utf-8')
for tr in trs:
food_dict = {}
tds = tr.find_all('td')
name = tds[0].get_text()
price = tds[1].get_text()
address = tds[3].get_text()
time = tds[4].get_text()
food_dict['name'] = name
food_dict['price'] = price
food_dict['address'] = address
food_dict['time'] = time
food_list.append(food_dict)
fb.write(json.dumps(food_dict, indent=2, ensure_ascii=False) + ',\n')
fb.close()
return food_list