1. 程式人生 > >利用 selenium 抓取 淘寶信息

利用 selenium 抓取 淘寶信息

tle clas date screens pin sleep source log pre

import lxml
from bs4 import BeautifulSoup
import time
from selenium import webdriver
import re
driver = webdriver.PhantomJS()
driver.set_window_size(1600,20000)
driver.get("https://item.taobao.com/item.htm?spm=2013.1.0.0.bLyAul&id=17676925595")
time.sleep(10)
date=driver.page_source
soup=BeautifulSoup(date,"
lxml") shu_liang=re.findall(r.*title="30天內已售出(\d.*?)件,str(soup.select(".tb-sell-counter"))) color=re.findall(r<li title="(.*)">顏色分類|li title="(.*)">主要顏色,str(soup.select(".attributes-list"))) mig="http://"+ re.match(r.*?(//.*?alicdn.*?)_50x50.jpg,str(soup.select("#J_UlThumb > li > div > a > img
"))).group(1) daytime=time.strftime(%Y/%m/%d, time.localtime(time.time())) leimu=soup.select(".tb-pine")[0].get("data-catid") dates=[mig,shu_liang,color,daytime,leimu] title="" driver.save_screenshot(title.join(re.findall("([\u4e00-\u9fa5])",driver.title))+".jpg") print(dates) driver.quit()

利用 selenium 抓取 淘寶信息