1. 程式人生 > >使用http.cookiejar帶cookie信息登錄爬取方法 -《狗嗨默示錄》-

使用http.cookiejar帶cookie信息登錄爬取方法 -《狗嗨默示錄》-

web -a utf 5.0 wow int ket ref href

Login.py

# !/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib.request
import urllib.parse
import user_info
import http.cookiejar
import re
import time
import socket


cookie = http.cookiejar.CookieJar() #創建cookieJar保存cookie
handler = urllib.request.HTTPCookieProcessor(cookie) #創建cookie處理對象
opener = urllib.request.build_opener(handler) #
構建攜帶cookie的打開方式 post_url = http://www.ks5u.com/user/inc/UserLogin_Index.asp def login(): req = urllib.request.Request(post_url,user_info.data) #創建請求 # html = urllib.request.urlopen(‘http://www.ks5u.com/user/inc/UserLogin_Index.asp‘,[email protected]&password=qq4980&c_add=1‘) #print(html).decode(‘gb2312‘)
html = opener.open(req).read() #開啟請求,保存登錄cookie return html.decode(gb2312) if u[email protected] in login(): print(登錄成功) else: print(登錄失敗) def getlist(): #選擇專題模擬類型 req = urllib.request.Request(http://www.ks5u.com/zhuantimoni/ashx/jinbang.ashx,data=xueke=1&shenfen=32
.encode(utf-8)) html = opener.open(req).read().decode(utf-8) reg = r<a href="(.+?)" target="_blank" title="(.+?)"> return re.findall(reg,html) def getfile(id,name,url): req = urllib.request.Request(http://www.ks5u.com/USER/INC/Dpwnsch.asp?id=%s%id) req.add_header(Referer,url) req.add_header(User-Agent,Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36) open(name.decode(utf-8)+.doc,wb).write(opener.open(req).read().decode(utf-8)) for item in getlist(): url = item[0] name = item[1] print(name) id = url.split(/)[-1][:-6] try: getfile(id,name,url) time.sleep(2) socket.setdefaulttimeout(30) except Exception as e: print(下載失敗)

使用http.cookiejar帶cookie信息登錄爬取方法 -《狗嗨默示錄》-