1. 程式人生 > >Python爬蟲:無頭瀏覽器爬蟲

Python爬蟲:無頭瀏覽器爬蟲

Ubuntu

使用chromium

sudo apt-get install -y chromium-browser # 安裝瀏覽器,這部必須,如果隻手動安裝執行會報錯,缺少依賴。

或者看這個安裝新版瀏覽器並用binary_location指定位置(需要科學上網):
https://github.com/scheib/chromium-latest-linux
也可以不科學上網手動下載:
https://www.chromium.org/getting-involved/download-chromium

CentOS

使用firefox

yum -y install firefox

驅動:

將其許可權+x
chrome:http://chromedriver.chromium.org/
firefox:https://github.com/mozilla/geckodriver/releases

使用瀏覽器的無頭模式headless

安裝模組:

pip3 install selenium beautifulsoup4 lxml # ChromeDriver

chrome

#!/usr/bin/env python
#coding=utf-8
from selenium import webdriver
from selenium.webdriver.chrome.
options import Options from bs4 import BeautifulSoup import time url = "http://www.qq.com" options = Options() options.headless = True #options.binary_location = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe" #driver = webdriver.Chrome(executable_path='chromedriver.exe', chrome_options=options)
#options.binary_location = "/home/ubuntu/chrome-linux/chrome" driver = webdriver.Chrome(executable_path='/home/ubuntu/chromedriver', chrome_options=options) driver.get(url) html = driver.page_source print(html) driver.quit()

firefox

#!/usr/bin/env python
#coding=utf-8
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
import time

url = 'http://www.qq.com/'
options = Options()
options.headless = True

#如果設定代理
"""
profile = webdriver.FirefoxProfile()
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.http', "127.0.0.1")
profile.set_preference('network.proxy.http_port', 1080)
profile.set_preference('network.proxy.socks', "127.0.0.1")
profile.set_preference('network.proxy.socks_port', 1080)
profile.set_preference('network.proxy.ssl', "127.0.0.1")
profile.set_preference('network.proxy.ssl_port', 1080)
profile.set_preference('network.proxy.ftp', "127.0.0.1")
profile.set_preference('network.proxy.ftp_port', 1080)

#profile.set_preference("network.proxy.share_proxy_settings", True)
#profile.update_preferences()
"""

#options.binary_location = "D:/Program Files/Mozilla Firefox/firefox.exe"
#driver = webdriver.Firefox(executable_path='geckodriver.exe', firefox_profile=profile, firefox_options=options)

#options.binary_location = "/root/firefox-linux/bin/firefox"
driver = webdriver.Firefox(executable_path='/root/geckodriver', firefox_profile=profile, firefox_options=options)
driver.get(url)
html = driver.page_source
print(html)
driver.quit()