1. 程式人生 > >selenium模擬 + 滑鼠滾動爬取魔方公寓租房評論資訊

selenium模擬 + 滑鼠滾動爬取魔方公寓租房評論資訊

因為魔方公寓的評論資訊是在一個單獨的div中, 需要模擬滑鼠滾動才能拿到評論資訊, 並且每次只能拿到在頁面顯示的內容, 頁面沒有顯示的取出來的是空. 因此只能在每次滾動後取值儲存.

程式碼:

import time
import win32api, win32con
from win32api import GetSystemMetrics
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from win32con import SM_CXSCREEN, SM_CYSCREEN

options = Options()
# options.add_argument("--headless")
driver = webdriver.Chrome(executable_path=r"D:\tools\chromedriver.exe", options=options)
url = "https://www.52mf.com.cn/shopDetails/8069"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
    "Referer":"https://www.52mf.com.cn/"
}
driver.get(url=url)
for i in range(3):
    driver.execute_script("window.scrollBy(0,200)")
    time.sleep(0.2)
all_ping = driver.find_element_by_css_selector("#anchor6 > div.all-comment.cursor-pointer")
all_ping.click()
time.sleep(1)
# 找到div的位置, 並將滑鼠移動到div上
x = GetSystemMetrics(SM_CXSCREEN)
y = GetSystemMetrics(SM_CYSCREEN)
driver.maximize_window()
ActionChains(driver).move_by_offset(x/2,y/2)
# 設定滑鼠滾動次數
for i in range(260):
    # 模擬滑鼠滾動
    win32api.mouse_event(win32con.MOUSEEVENTF_WHEEL, 0, 0, -448)
    time.sleep(0.2)
    ping_list = driver.find_elements_by_xpath('//div[@id="modal-root"]//ul/li/p')
    time.sleep(0.2)
    for ping in ping_list:
        ping_str = ping.text.strip()
        # 頁面沒顯示的取出的為空, 判斷取出內容是否為空, 為空跳過 不為空寫入檔案
        if ping_str == "":
            pass
        else:
            with open("ping.txt","a",encoding="utf8") as fp:
                fp.write(ping_str + "\n")
    time.sleep(0.1)
driver.close()