Python爬蟲:selenium掛shadowsocks代理爬取網頁內容
阿新 • • 發佈:2018-12-29
selenium掛ss代理爬取網頁內容
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import time
url = 'https://www.google.com/'
options = Options()
options.headless = True # 無頭模式,即不開啟瀏覽器UI
options.binary_location = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe" # chrome安裝位置
options.add_argument('--proxy-server=socks5://127.0.0.1:1080') # ss代理
driver = webdriver.Chrome(executable_path='chromedriver.exe', chrome_options=options) #chromedriver位置
driver.get(url) # 開啟url,會阻塞直到完全開啟
html = driver.page_source # 獲取html內容
driver.quit()
print(html)