1. 程式人生 > >python第三方庫requests

python第三方庫requests

1.請求分析

"""requests庫的應用。requests是比python內建urllib庫更好的http請求庫"""
import requests
from requests.models import Response


res = requests.get("https://www.baidu.com/")    # 此處可以換成post,delete,patch等請求
print(type(res))        # <class 'requests.models.Response'>  Response類
print(res.status_code)  #
請求返回的狀態碼 200表示成功 print(type(res.text)) # str型別 print(res.text) # 返回文字屬性 print(res.cookies) # cookies資訊<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]> req = requests.get("http://httpbin.org/get") # 可直接新增字典形式的引數請求params=dict() # req = requests.get("http://httpbin.org/get", params=data)
print(req.headers) # 獲取返回的請求頭資訊 """ { "args": { # 此處會顯示請求的url中引數資訊 }, "headers": { # 請求頭資訊 "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Connection": "close", # 連線關閉 "Host": "httpbin.org", "User-Agent": "python-requests/2.20.1" # 不設定headers,則顯示本身請求資訊 }, "origin": "223.73.146.13", "url": "http://httpbin.org/get" }
""" # 直接獲取返回的字典形式解析結果 rew = requests.get("http://httpbin.org/get") print(type(rew.text)) # str型別 print(rew.json()) # 呼叫響應類Response的方法json()轉換json字串為字典 print(type(rew.json())) # <class 'dict'>
View Code

2.POST請求

"""requests的POST請求和檔案上傳"""
import requests


data = {"name": "Jim", "age": "26"}
res = requests.post("http://httpbin.org/post", data=data)
print(res.text)


"""
{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {         # post請求的內容
    "age": "26", 
    "name": "Jim"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "15", 
    "Content-Type": "application/x-www-form-urlencoded",    # 請求資料型別
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.20.1"
  }, 
  "json": null, 
  "origin": "223.73.146.12", 
  "url": "http://httpbin.org/post"
}
"""


files = {"file": open("favicon.ico", "rb")}
ren = requests.post("http://httpbin.org/post", files=files)
print(ren.text)


"""
{
  "args": {}, 
  "data": "", 
  "files": {            # 檔案上傳內容會有單獨的files欄位來標明
    "file": "data:application/octet-stream;base64"
  }, 
  "form": {},           # 空form
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "6665", 
    # 檔案上傳型別
    "Content-Type": "multipart/form-data; boundary=717f83c70b6a0c14c53c66a11897cc72", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.20.1"
  }, 
  "json": null, 
  "origin": "223.73.146.12", 
  "url": "http://httpbin.org/post"
}
"""
View Code

3.requests獲取多媒體資料

"""獲取多媒體二進位制資料"""
import requests


# 獲取多媒體二進位制資料,比如圖示
tag = requests.get("https://github.githubassets.com/favicon.ico")
print(tag.text)     # 因為是二進位制資料,不能直接文字顯示。亂碼
print(tag.content)  # 字串以b開頭的二進位制資料
# with open檔案操作語句,在開啟檔案執行操作完成之後自動完成檔案關閉。裡面必須包含__enter__和__exit__方法
with open("favicon.ico", "wb") as f:
    # f是獲取的檔案控制代碼物件,呼叫檔案控制代碼的方法完成讀寫操作。在這裡是把圖示存入當前目錄下
    f.write(tag.content)
View Code

4.cookies的應用

"""requests中cookies的應用"""
import requests


headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"}
res = requests.get("https://www.baidu.com", headers=headers)
print(res.cookies)
# 呼叫items()方法轉化為元祖組成的列表,遍歷輸出每一個cookie
for key, value in res.cookies.items():
    print(key + "=" + value)


"""
# 型別RequestsCookieJar
<RequestsCookieJar[<Cookie BAIDUID=CE10743D4DCCA7E8EAE505F60EBD3992:FG=1 for .baidu.com/>, <Cookie BIDUPSID=CE10743D4DCCA7E8EAE505F60EBD3992 for .baidu.com/>, <Cookie H_PS_PSSID=26523_1450_21091_28206_28132_27750_28140_27508 for .baidu.com/>, <Cookie PSTM=1546168944 for .baidu.com/>, <Cookie delPer=0 for .baidu.com/>, <Cookie BDSVRTM=0 for www.baidu.com/>, <Cookie BD_HOME=0 for www.baidu.com/>]>
BAIDUID=CE10743D4DCCA7E8EAE505F60EBD3992:FG=1
BIDUPSID=CE10743D4DCCA7E8EAE505F60EBD3992
H_PS_PSSID=26523_1450_21091_28206_28132_27750_28140_27508
PSTM=1546168944
delPer=0
BDSVRTM=0
BD_HOME=0
"""


"""requests庫利用cookies請求知乎的兩種方式"""
import requests
from requests.cookies import RequestsCookieJar

"""
# 常用
headers = {
    "cookie": 'q_c1=c5dafb4029a94bd28bab31916755a82a|1546093256000|1546093256000; r_cap_id="NzJjY2ViMWQ2MWQ2NGNmYmE5ZjJjYjA4MmI4OTlmYjM=|1546093256|4fdca7ef25aee38df8b6895fe0e6870b6c4157d7"; cap_id="NTIxY2Q0NWY2NGYzNDMwYjhmZWJiNzkxY2YxNzMxNTE=|1546093256|98ea49da057eacfa5979f20ebc86610106421e92"; l_cap_id="M2JlYTFhZmQzOWRjNDE1NzhmM2VjYjA2YWFmNGViNzI=|1546093256|321a5802717da27fc618ba74eb0dcb928d037974"; d_c0="AODhnmWpvg6PTmDEXzC_5yO9xS5KsJL3g1c=|1546093259"; _xsrf=T3PdhoFZCjirABeT1olJ9o2Ju1WH5PQP; _zap=492572c4-fbf2-48d2-9ac9-ec7ee95a9154; tgw_l7_route=931b604f0432b1e60014973b6cd4c7bc; capsion_ticket="2|1:0|10:1546169369|14:capsion_ticket|44:MWJhZDk3YTYxYTA5NDkxZjhlYWUwODRlNDliY2Y4Mjc=|a303a846033bf574b0bdd3677218f5bb49a56b00890aa86a7a9b0ac7c6b53985"; z_c0="2|1:0|10:1546169409|4:z_c0|92:Mi4xVEljZkJBQUFBQUFBNE9HZVphbS1EaVlBQUFCZ0FsVk5RZm9WWFFCdTAxTV94cHpiQ0xUV2dBRC16RmpacE4xZXp3|e24885ab0706d33abc6e7f45f2eb74769d88bc15366d945d57366efe8f5856a0"; unlock_ticket="AJBCprKETQsmAAAAYAJVTUmzKFwvSS6hhmi4BuDGUgm1G6fZBYhVXw=="; tst=r',
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"
}
res = requests.get("https://www.zhihu.com", headers=headers)
print(res.text)
"""

# 比較繁瑣
cookies = 'q_c1=c5dafb4029a94bd28bab31916755a82a|1546093256000|1546093256000; r_cap_id="NzJjY2ViMWQ2MWQ2NGNmYmE5ZjJjYjA4MmI4OTlmYjM=|1546093256|4fdca7ef25aee38df8b6895fe0e6870b6c4157d7"; cap_id="NTIxY2Q0NWY2NGYzNDMwYjhmZWJiNzkxY2YxNzMxNTE=|1546093256|98ea49da057eacfa5979f20ebc86610106421e92"; l_cap_id="M2JlYTFhZmQzOWRjNDE1NzhmM2VjYjA2YWFmNGViNzI=|1546093256|321a5802717da27fc618ba74eb0dcb928d037974"; d_c0="AODhnmWpvg6PTmDEXzC_5yO9xS5KsJL3g1c=|1546093259"; _xsrf=T3PdhoFZCjirABeT1olJ9o2Ju1WH5PQP; _zap=492572c4-fbf2-48d2-9ac9-ec7ee95a9154; tgw_l7_route=931b604f0432b1e60014973b6cd4c7bc; capsion_ticket="2|1:0|10:1546169369|14:capsion_ticket|44:MWJhZDk3YTYxYTA5NDkxZjhlYWUwODRlNDliY2Y4Mjc=|a303a846033bf574b0bdd3677218f5bb49a56b00890aa86a7a9b0ac7c6b53985"; z_c0="2|1:0|10:1546169409|4:z_c0|92:Mi4xVEljZkJBQUFBQUFBNE9HZVphbS1EaVlBQUFCZ0FsVk5RZm9WWFFCdTAxTV94cHpiQ0xUV2dBRC16RmpacE4xZXp3|e24885ab0706d33abc6e7f45f2eb74769d88bc15366d945d57366efe8f5856a0"; unlock_ticket="AJBCprKETQsmAAAAYAJVTUmzKFwvSS6hhmi4BuDGUgm1G6fZBYhVXw=="; tst=r'
jar = requests.cookies.RequestsCookieJar()      # 生成請求的cookies例項jar
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"
}
for cookie in cookies.split(";"):
    print(type(cookie))     # 型別str
    key, value = cookie.split("=", 1)   # 變數迴圈接收值,需指定每一次接收的數量,在這裡是1個
    jar.set(key, value)                 # 在jar例項中設定cookies
# 攜帶cookies和headers發起請求
res = requests.get("https://www.zhihu.com", cookies=jar, headers=headers)
print(res.text)
View Code

5.簡單的實戰作用

"""requests配合re正則實戰解析"""
import requests
import re


headers={"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"}
url = "https://www.zhihu.com/explore"
# 獲取響應的資料
res = requests.get(url=url, headers=headers)
# 正則字串編譯成正則表示式物件,.*?是非貪婪模式匹配,匹配的字串越短越好,re.S使.匹配換行符在內的所有字元
content_tag = re.compile("explore-feed.*?question_link.*?>(.*?)</a>", re.S)
# 獲取匹配的字串資訊
title = re.findall(content_tag, res.text)
print(title)
View Code

6.代理設定

"""requests的代理設定proxies和socks。同理可以新增超時設定timeout,
因為請求分為連線和讀取,也可以接收一個元祖傳引數。此外還可以做登陸認證,
直接使用auth認證。
"""
import requests


# 基本的Http代理
proxies = {
    "http": "http://user:[email protected]:3780"
}
res = requests.get("https://www.taobao.com", proxies=proxies, timeout=1, auth=("username", "password"))
print(res.text)

# socks協議的代理
proxies1 = {
    "https": "socks5://user:[email protected]:port"
}
ren = requests.get("https://www.taobao.com", proxies=proxies1, timeout=(1, 2, 3))
print(ren.text)
View Code

7.會話維持

"""requests當中的session會話維持"""
import requests


# 生成會話物件
ses = requests.Session()
# 用會話物件請求url並設定cookies
ses.get("http://httpbin.org/cookies/set/number/1234")
# 再次使用會話物件獲取已設定的cookies資訊
res = ses.get("http://httpbin.org/cookies")
print(res.text)
View Code