爬取Aliexpress網站的商品資料,儲存至excel表格
阿新 • • 發佈:2019-01-03
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/4/19 10:43 # @Author : WuFan import bs4 import requests import re import xlwt import datetime date = datetime.datetime.now().strftime('%Y-%m-%d') # 給檔案打上時間戳,便於資料更新 url = 'https://www.aliexpress.com/wholesale' # 網址 payload = {'SearchText': 'nike', 'page': '1', 'ie': 'utf8', 'g': 'y'} # 字典傳遞url引數 # 初始化資料容器 title = [] price = [] order = [] store = [] for i in range(0, 5): # 迴圈5次,就是5個頁的商品資料 payload['page'] = i+ 1 # 此處為頁碼,根據網頁引數具體設定 resp = requests.get(url, params=payload) soup = bs4.BeautifulSoup(resp.text, "html.parser") print(resp.url) # 列印訪問的網址 resp.encoding = 'utf-8' # 設定編碼 # 標題 all_title = soup.find_all('a', class_=re.compile("history-item product")) for j in all_title: soup_title = bs4.BeautifulSoup(str(j), "html.parser",) title.append(soup_title.a['title']) # 價格 all_price = soup.find_all('span', itemprop="price") for k in all_price: soup_price = bs4.BeautifulSoup(str(k), "html.parser") price.append(soup_price.span.string) # 訂單量 all_order = soup.find_all('a', class_=re.compile("order-num-a")) for l in all_order: soup_order = bs4.BeautifulSoup(str(l), "html.parser") order.append(soup_order.em.string) # 店鋪名稱 all_store = soup.find_all('div', class_="store-name util-clearfix") for m in all_store: soup_store = bs4.BeautifulSoup(str(m), "html.parser") store.append(soup_store.a.string) # 資料驗證 print(len(title)) print(len(price)) print(len(order)) print(len(store)) if len(title) == len(price) == len(order) == len(store): print("資料完整,生成 %d 組商品資料!" % len(title)) # 寫入excel文件 print("正在寫入excel表格...") wookbook = xlwt.Workbook(encoding='utf-8') # 建立工作簿 data_sheet = wookbook.add_sheet('demo') # 建立sheet # 生成每一行資料 for n in range(len(title)): data_sheet.write(n, 0, n+1) data_sheet.write(n, 1, title[n]) # n 表示行, 1 表示列 data_sheet.write(n, 2, price[n]) data_sheet.write(n, 3, order[n]) data_sheet.write(n, 4, store[n]) wookbook.save("%s-%s.xls" % (payload['SearchText'], date)) print("寫入excel表格成功!")