1. 程式人生 > >微信網頁版登入爬蟲小案例

微信網頁版登入爬蟲小案例

微信登入爬蟲小案例,使用的抓包工具是fiddler

import requests
import re
from PIL import Image
import urllib3
urllib3.disable_warnings()
from bs4 import  BeautifulSoup

import  json

#1.獲取uuid  為掃碼的連結尋找引數
#2.獲取二維碼
#3.掃描二維碼的時候持續傳送請求的連結https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=obw7uFJYPw==&tip=0&r=-527204269&_=1520945398715
# # class WeChat(): def __init__(self): self.session=requests.session() self.session.verify=False ##獲取uuid def getUuid(self): url="https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_=1520933268366"
response=self.session.get(url) #print(response.text) self.uuid=re.findall(r'uuid = "(.*?)"',response.text)[0] print(self.uuid) ##h獲取二維碼 def get_code(self): url="https://login.weixin.qq.com/qrcode/{}".format(self.uuid) response=self.session.get(url) print(response.content) with
open('wxcode.png','wb') as f : f.write(response.content) image=Image.open('wxcode.png') image.show() ##掃碼之後的請求,不掃碼請求是401 ##掃碼之後是200 def login(self): url="https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={}&tip=0&r=-527204269&_=1520945398715".format(self.uuid) while True: response=self.session.get(url) if '200' in response.text: self.redirect_url=re.findall('redirect_uri="(.*?)"',response.text)[0] break print(response.text) #手機上點選登入後,傳送的請求,這個是獲取下一個重要連結的請求引數 def login_parse(self): response=self.session.get(self.redirect_url,allow_redirects=False) print(response.history) print(response.status_code) soup=BeautifulSoup(response.text,'lxml') self.skey=soup.find('skey').text self.wxsid=soup.find('wxsid').text self.wxuin = soup.find('wxuin').text self.pass_ticket = soup.find('pass_ticket').text self.isgrayscale = soup.find('isgrayscale').text print(self.skey) ''' <error> <ret>0</ret> <message></message> <skey>@crypt_b78dd979_ba51a26273893cc5fd471e8a5bf59f44</skey> <wxsid>9sdRG7Oy8VMI1gEr</wxsid> <wxuin>2359397621</wxuin> <pass_ticket>CrvVhVc84VwMpTeSvUgmAkZ0TVLHqfw%2BuNvJOu3tLhXmSrG7BEM9n%2BrLeqWG9Fh0</pass_ticket> <isgrayscale>1</isgrayscale> </error> ''' print(response.text) ##登入進去,獲取微信使用者的詳細資訊(好友、公眾號等資訊) def login_in(self): url="https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-603124408&pass_ticket=SpeKAGlPKJl48YWyrgD18vBSMskBIt%252B5rm1rkhaciSaV1oo%252FgNIbGkWKGnrkUkbJ" data={"BaseRequest":{"Uin":self.wxuin,"Sid":self.wxsid,"Skey":self.skey,"DeviceID":"e547323813630142"}} response=self.session.post(url,data=json.dumps(data)) response.encoding="utf8" print(response.status_code) print(response.text) def main(self): print(self.getUuid()) self.get_code() self.login() self.login_parse() self.login_in() if __name__=="__main__": wechat=WeChat() wechat.main()

總結:

  • 由於,requests預設解決301跳轉問題,這樣並不能得到我們要的資訊,所以有時候可以設定allow_redirects=False。(當不知道是否有301跳轉時,response.history即可知道),如果設定了allow_redirects=False ,response.status_code 就會顯示301。
  • post傳參時,json字串要序列化json.dumps()
  • 微信掃碼這一步還必須要人工,不知道是否能用機器解決。

有任何疑問請評論留言。