selenium模擬 + 滑鼠滾動爬取魔方公寓租房評論資訊
阿新 • • 發佈:2018-12-25
因為魔方公寓的評論資訊是在一個單獨的div中, 需要模擬滑鼠滾動才能拿到評論資訊, 並且每次只能拿到在頁面顯示的內容, 頁面沒有顯示的取出來的是空. 因此只能在每次滾動後取值儲存.
程式碼:
import time import win32api, win32con from win32api import GetSystemMetrics from selenium.webdriver.common.action_chains import ActionChains from selenium import webdriver from selenium.webdriver.chrome.options import Options from win32con import SM_CXSCREEN, SM_CYSCREEN options = Options() # options.add_argument("--headless") driver = webdriver.Chrome(executable_path=r"D:\tools\chromedriver.exe", options=options) url = "https://www.52mf.com.cn/shopDetails/8069" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36", "Referer":"https://www.52mf.com.cn/" } driver.get(url=url) for i in range(3): driver.execute_script("window.scrollBy(0,200)") time.sleep(0.2) all_ping = driver.find_element_by_css_selector("#anchor6 > div.all-comment.cursor-pointer") all_ping.click() time.sleep(1) # 找到div的位置, 並將滑鼠移動到div上 x = GetSystemMetrics(SM_CXSCREEN) y = GetSystemMetrics(SM_CYSCREEN) driver.maximize_window() ActionChains(driver).move_by_offset(x/2,y/2) # 設定滑鼠滾動次數 for i in range(260): # 模擬滑鼠滾動 win32api.mouse_event(win32con.MOUSEEVENTF_WHEEL, 0, 0, -448) time.sleep(0.2) ping_list = driver.find_elements_by_xpath('//div[@id="modal-root"]//ul/li/p') time.sleep(0.2) for ping in ping_list: ping_str = ping.text.strip() # 頁面沒顯示的取出的為空, 判斷取出內容是否為空, 為空跳過 不為空寫入檔案 if ping_str == "": pass else: with open("ping.txt","a",encoding="utf8") as fp: fp.write(ping_str + "\n") time.sleep(0.1) driver.close()