【python】模擬使用者登入爬取資料帶cookie情況處理
阿新 • • 發佈:2019-02-05
#!/usr/bin/env python # -*- coding: utf-8 -*- import urllib.request, urllib.parse, urllib.error import http.cookiejar # 登入地址 LOGIN_URL = '登陸地址' # 請求引數 values = {'utf8': '✓', 'authenticity_token': 'IIlZWXgqKz6U9bXPhwA9DXf2S8HT1/6uxrcOMCT4xNA=', 'account[email]': '使用者名稱', 'account[password]': "密碼", 'commit': ' login'} # 編碼 postdata = urllib.parse.urlencode(values).encode() user_agent = r'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36' headers = {'User-Agent': user_agent, 'Connection': 'keep-alive'} cookie_filename = 'cookie.txt' cookie = http.cookiejar.MozillaCookieJar(cookie_filename) handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) request = urllib.request.Request(LOGIN_URL, postdata, headers) try: response = opener.open(request) page = response.read().decode() # print(page) except urllib.error.URLError as e: print(e.code, ':', e.reason) cookie.save(ignore_discard=True, ignore_expires=True) # 儲存cookie到cookie.txt中 print(cookie) for item in cookie: print('Name = ' + item.name) print('Value = ' + item.value) # 要爬取的連結 get_url = xxx' get_request = urllib.request.Request(get_url, headers=headers) get_response = opener.open(get_request) print(get_response.read().decode())