1. 程式人生 > >第十章 採集javascript 使用selenium庫進行獲取

第十章 採集javascript 使用selenium庫進行獲取

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# 等3秒獲取指定的內容 會有selenium不支援無頭的phantomjs的警告
# from selenium import webdriver
# import time
# driver=webdriver.PhantomJS(executable_path='D:/pycharm/phantomjs-2.1.1-windows/bin/phantomjs')
# driver.get("http://pythonscraping.com/pages/javascript/ajaxDemo.html")
# time.sleep(3)
# print(driver.find_element_by_id('content').text)
# driver.close()


# 用id檢查頁面是不是已經完全載入
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support import expected_conditions as EC
# from selenium.webdriver.support.ui import WebDriverWait
#
# driver=webdriver.PhantomJS(executable_path='D:/pycharm/phantomjs-2.1.1-windows/bin/phantomjs')
# driver.get("http://pythonscraping.com/pages/javascript/ajaxDemo.html")
# try:
#     element=WebDriverWait(driver,10).until(
#         EC.presence_of_element_located(By.ID,"loadedButton"))
# finally:
#     print(driver.find_element_by_id("content").text)
#     driver.close()

# 處理重定向 客戶端重定向的處理 在頁面開始載入時監控dom元素,重複呼叫這個元素
# 直到selenium丟擲StaleElementReferenceException異常,說明網頁發生了跳轉

# 每半分鐘檢查一次網頁,看是否html的標籤還在不在,時限為10s
import time

from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException


def waitForLoad(driver):
    elem=driver.find_element_by_tag_name("html")
    count=0
    while True:
        count+=1
        if count>20:
            print("timing put after 10 seconds and returns")
            return
        time.sleep(.5)
        try:
            elem==driver.find_element_by_tag_name("html")
        except StaleElementReferenceException:
            return
driver=webdriver.phantomjs(executable_path='D:/pycharm/phantomjs-2.1.1-windows/bin/phantomjs')
driver.get("http://pythonscraping.com/pages/javascript/redirectDemo1.html")
waitForLoad(driver)
print(driver.page_source)