python爬取知乎專欄文章標題及URL
阿新 • • 發佈:2018-12-22
# -*- coding:utf-8 -*- from selenium import webdriver import time # 執行程式要安裝selenium模組,並下載Chrome瀏覽器驅動 driver = webdriver.Chrome() driver.get('https://zhuanlan.zhihu.com/buzhisuowei') # 專欄地址 # 將滾動條拉到頁面最底部,使頁面載入全部文章 i = 0 while(i < 10): #當文章數量多時,把10變大一些 driver.execute_script("window.scrollBy(0,5000)") time.sleep(2) i += 1 # 通過class name找到所有文章的標題 titles = driver.find_elements_by_class_name('PostListItem-title') print(len(titles)) j = 1 xpath_1 = '//*[@id="react-root"]/div/div/div[3]/div[2]/div[2]/ul/li[' xpath_2 = ']/div/div/a' while(j <= 17): #專欄文章數量 # 通過 xpath 找到所有文章的url xpath = xpath_1 + str(j) + xpath_2 url = driver.find_element_by_xpath(xpath).get_property('href') # 列印資訊 print url + titles[j-1].text j += 1