1. 程式人生 > >【python】模擬使用者登入爬取資料帶cookie情況處理

【python】模擬使用者登入爬取資料帶cookie情況處理

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib.request, urllib.parse, urllib.error
import http.cookiejar
# 登入地址
LOGIN_URL = '登陸地址'
# 請求引數
values = {'utf8': '✓', 'authenticity_token': 'IIlZWXgqKz6U9bXPhwA9DXf2S8HT1/6uxrcOMCT4xNA=',
             'account[email]': '使用者名稱', 'account[password]': "密碼",
             'commit': ' login'}
# 編碼
postdata = urllib.parse.urlencode(values).encode()
user_agent = r'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
headers = {'User-Agent': user_agent, 'Connection': 'keep-alive'}


cookie_filename = 'cookie.txt'
cookie = http.cookiejar.MozillaCookieJar(cookie_filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)

request = urllib.request.Request(LOGIN_URL, postdata, headers)
try:
    response = opener.open(request)
    page = response.read().decode()
    # print(page)
except urllib.error.URLError as e:
    print(e.code, ':', e.reason)

cookie.save(ignore_discard=True, ignore_expires=True)  # 儲存cookie到cookie.txt中
print(cookie)
for item in cookie:
    print('Name = ' + item.name)
    print('Value = ' + item.value)
    
# 要爬取的連結
get_url = xxx'
get_request = urllib.request.Request(get_url, headers=headers)
get_response = opener.open(get_request)
print(get_response.read().decode())