1. 程式人生 > >利用appium和Android模擬器爬取微信朋友圈(解決每次重啟登入)

利用appium和Android模擬器爬取微信朋友圈(解決每次重啟登入)

特別注意: 微信具有一定的反爬能力,在測試時發現,每次爬取任務時, 對應特定節點的ID和XPath都會發生變化,保險起見,每次重新連線手機,都要對節點ID和Xpath作更新。

同時設定引數 'noReset': True, # 啟動後結束後不清空應用資料,用例執行完後會預設重置APP,也就是刪除APP所有資料。

避免多次輸入使用者名稱和密碼登入,防止被封

首次登入需要用到login函式,第二次登入,請註釋login

import os
from appium import webdriver
from appium.webdriver.common.touch_action import TouchAction
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pymongo import MongoClient
from time import sleep
from processor import Processor
from config import *


class Moments():
    def __init__(self):
        """
        初始化
        """
        # 驅動配置
        self.desired_caps = {
            'platformName': PLATFORM,
            'deviceName': DEVICE_NAME,
            'appPackage': APP_PACKAGE,
            'appActivity': APP_ACTIVITY,
            'noReset': True
        }
        self.driver = webdriver.Remote(DRIVER_SERVER, self.desired_caps)
        self.wait = WebDriverWait(self.driver, TIMEOUT)
        self.client = MongoClient(MONGO_URL)
        self.db = self.client[MONGO_DB]
        self.collection = self.db[MONGO_COLLECTION]
        # 處理器
        self.processor = Processor()
    
    def login(self):
        """
        登入微信
        :return:
        """
        # 登入按鈕
        login = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/cjk')))
        login.click()
        # 手機輸入
        phone = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/h2')))
        phone.set_text(USERNAME)
        # 下一步
        next = self.wait.until(EC.element_to_be_clickable((By.ID, 'com.tencent.mm:id/adj')))
        next.click()
        # 密碼
        password = self.wait.until(
            EC.presence_of_element_located((By.XPATH, '//*[@resource-id="com.tencent.mm:id/h2"][1]')))
        password.set_text(PASSWORD)
        # 提交
        submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'com.tencent.mm:id/adj')))
        submit.click()
    
    def enter(self):
        """
        進入朋友圈
        :return:
        """
        # 選項卡
        tab = self.wait.until(
            EC.presence_of_element_located((By.XPATH, '//*[@resource-id="com.tencent.mm:id/bw3"][3]')))
        tab.click()
        # 朋友圈
        moments = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/atz')))
        moments.click()
    
    def crawl(self):
        """
        爬取
        :return:
        """
        while True:
            # 當前頁面顯示的所有狀態
            items = self.wait.until(
                EC.presence_of_all_elements_located(
                    (By.XPATH, '//*[@resource-id="com.tencent.mm:id/cve"]//android.widget.FrameLayout')))
            # 上滑
            self.driver.swipe(FLICK_START_X, FLICK_START_Y + FLICK_DISTANCE, FLICK_START_X, FLICK_START_Y)
            # 遍歷每條狀態
            for item in items:
                try:
                    # 暱稱
                    nickname = item.find_element_by_id('com.tencent.mm:id/aig').get_attribute('text')
                    # 正文
                    content = item.find_element_by_id('com.tencent.mm:id/cwm').get_attribute('text')
                    # 日期
                    date = item.find_element_by_id('com.tencent.mm:id/crh').get_attribute('text')
                    # 處理日期
                    date = self.processor.date(date)
                    print(nickname, content, date)
                    data = {
                        'nickname': nickname,
                        'content': content,
                        'date': date,
                    }
                    # 插入MongoDB
                    self.collection.update({'nickname': nickname, 'content': content}, {'$set': data}, True)
                    sleep(SCROLL_SLEEP_TIME)
                except NoSuchElementException:
                    pass
    
    def main(self):
        """
        入口
        :return:
        """
        # 登入 首次登入需要用到login函式,第二次登入,請註釋login
        self.login()
        # 進入朋友圈
        self.enter()
        # 爬取
        self.crawl()


if __name__ == '__main__':
    moments = Moments()
    moments.main()

配置程式碼config.py

import os

# 平臺
PLATFORM = 'Android'

# 裝置名稱 通過 adb devices -l 獲取
DEVICE_NAME = 'MI_NOTE_Pro'

# APP路徑
APP = os.path.abspath('.') + '/weixin.apk'

# APP包名
APP_PACKAGE = 'com.tencent.mm'

# 入口類名
APP_ACTIVITY = '.ui.LauncherUI'

# Appium地址
DRIVER_SERVER = 'http://localhost:4723/wd/hub'
# 等待元素載入時間
TIMEOUT = 300

# 微信手機號密碼
USERNAME = ''
PASSWORD = ''

# 滑動點
FLICK_START_X = 300
FLICK_START_Y = 300
FLICK_DISTANCE = 700

# MongoDB配置
MONGO_URL = 'localhost'
MONGO_DB = 'moments'
MONGO_COLLECTION = 'moments'

# 滑動間隔
SCROLL_SLEEP_TIME = 1

 時間處理程式碼processor.py

import time
import re


class Processor():
    def date(self, datetime):
        """
        處理時間
        :param datetime: 原始時間
        :return: 處理後時間
        """
        if re.match('\d+分鐘前', datetime):
            minute = re.match('(\d+)', datetime).group(1)
            datetime = time.strftime('%Y-%m-%d', time.localtime(time.time() - float(minute) * 60))
        if re.match('\d+小時前', datetime):
            hour = re.match('(\d+)', datetime).group(1)
            datetime = time.strftime('%Y-%m-%d', time.localtime(time.time() - float(hour) * 60 * 60))
        if re.match('昨天', datetime):
            datetime = time.strftime('%Y-%m-%d', time.localtime(time.time() - 24 * 60 * 60))
        if re.match('\d+天前', datetime):
            day = re.match('(\d+)', datetime).group(1)
            datetime = time.strftime('%Y-%m-%d', time.localtime(time.time()) - float(day) * 24 * 60 * 60)
        return datetime