1. 程式人生 > >Selenium新增Cookie模擬無驗證碼登入

Selenium新增Cookie模擬無驗證碼登入

最近爬蟲碰見需要用selenium模擬登入,有驗證碼滑塊+圖片點選驗證,在解決過程中發現一篇很好的示例,雖然是無驗證碼的那種,但是還不錯,先記下來!

程式碼及註釋如下:

#!coding=utf-8
import time
from selenium import webdriver
import pickle


class BaiduSpider(object):
    def __init__(self, username, password):
        self.username = username
        self.password = password
        self.driver = webdriver.Chrome()
        self.driver.get(url='http://i.baidu.com/my/history')
        self.set_cookie()
        self.is_login()

    def is_login(self):
        '''判斷當前是否登陸'''
        self.driver.refresh()
        html = self.driver.page_source
        if html.find(self.username) == -1:  # 利用使用者名稱判斷是否登陸
            print('還未登入,請手動登入!')
            self.login()
        else:
            print('已經登入!')
            self.driver.get(url='https://tieba.baidu.com/index.html?traceid=')
            time.sleep(30)  # 延時看效果

    def login(self):
        '''登陸'''
        time.sleep(60)  # 等待手動登入
        self.driver.refresh()
        self.save_cookie()

    def save_cookie(self):
        '''儲存cookie'''
        # 將cookie序列化儲存下來
        pickle.dump(self.driver.get_cookies(), open("cookies.pkl", "wb"))

    def set_cookie(self):
        '''往瀏覽器新增cookie'''
        '''利用pickle序列化後的cookie'''
        try:
            cookies = pickle.load(open("cookies.pkl", "rb"))
            for cookie in cookies:
                cookie_dict = {
                    "domain": ".baidu.com",  # 火狐瀏覽器不用填寫,谷歌要需要
                    'name': cookie.get('name'),
                    'value': cookie.get('value'),
                    "expires": "",
                    'path': '/',
                    'httpOnly': False,
                    'HostOnly': False,
                    'Secure': False}
                self.driver.add_cookie(cookie_dict)
        except Exception as e:
            print(e)


if __name__ == '__main__':
    BaiduSpider('xxxx', 'xxxx')  # 你的百度賬號,密碼