1. 程式人生 > >自學Python爬蟲(二)Requests庫的使用

自學Python爬蟲(二)Requests庫的使用

前言
Urllib和requests庫都是python3中傳送請求的庫,但是比較而言,Requests庫更加強大和易用,所以學習python3就不要學習urllib了,2020年python2的庫就不再更新,所以我們學習python3更有意義!

例項引入

import requests

response = requests.get('http://www.baidu.com')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)
print
(response.cookies)

各種請求方式

import requests

requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')

請求

基本的GET請求
基本寫法

import requests
response = requests.get('http://httpbin.org/get'
) print(response)

帶引數的GET請求

# 第一種
import requests
response = requests.get('http://httpbin.org/get?name=will&age=20')
print(response.text)

# 第二種
import requests
data = {
    'name':'will',
    'age':20
}
response = requests.get('http://httpbin.org/get',params=data)
print(response.text)

解析Json

import
requests
response = requests.get('http://httpbin.org/get') print(type(response.text)) print(response.json()) print(type(response.json()))

獲取二進位制資料

import requests
# url
response = requests.get('http://httpbin.org/favicon.icon')
print(type(response.text),type(response.content))
print(response.text)
print(response.content)
import requests

response = requests.get('http://httpbin.org/favicon.icon')
with open('favicon.icon','wb') as f:
    f.write(response.content)
    f.close()

新增headers

import requests

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36'} 
response = requests.get('http://httpbin.org/explore',headers=headers)
print(response.text)

基本POST請求

import requests

data ={'name':'will','age':'20'}
response = requests.post('http://httpbin.org/post',data=data)
print(response.text)
import requests

data ={'name':'will','age':'20'}
hearders = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36'    
}
response = requests.post('http://httpbin.org/post',data=data,headers=headers)
print(response.json())

響應

response屬性

import requests

response = requests.get('http://www.jianshu.com')
print(type(response.status_code),response.status_code)
print(type(response.headers),response.headers)
print(type(response.cookies),response.cookies)
print(type(response.url),response.url)
print(type(response.history),response.history)

狀態碼判斷

import requests

response = requests.get('http://www.jianshu.com')
if response.status_code == 200:
    print('Request Successfully!')
exit()

狀態碼:(連結:http://tools.jb51.net/table/http_status_code
這裡寫圖片描述

高階操作

檔案上傳

import requests

files = {'file':open('favicon.ico','rb')}
response = requests.post('http;//httpbin/post',files=files)
print(response.text)

獲取cookie

import requests

response = requests.get('http://wwwbaidu.com')
print(response.cookies)
for key, value in response.cookies.items():
    print(key+ '='+ value )

會話維持

import requests

requests.get('http://httpbin.org/cookies/set/number/123456789')
respoonse = requests.get('http://httpbin.org/cookies')
print(response.text)

上下對比(要用Session哦)

import requests

s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
response = s.get('http://httpbin.org/cookies')
print(response.text)

證書驗證

import requests

response = requests.get('https://www.12306.cn')
print(response.status_code)
import requests

response = requests.get('https://www.12306.cn', verify=False)
print(response.status_code)

代理設定

import requests

proxies = {
    "http":"http://127.0.0.1:9743",
    "https":"https://127.0.0.1:9743",
}

response = requests.get('http://www.baidu.com',proxies=proxies)
print(response.status_code)

超時設定

import requests

response = requests.get('http://httpbin.org/get',timeout=0.1)
print(response.status_code)

認證設定

import requests
from requests.auth import HTTPBasicAuth

r =requests.get('http;//120.27.34.24:9001'.auth=HTTPBasicAuth('user','123'))
print(r.status_code)
import requests

r =requests.get('http;//120.27.34.24:9001'.auth=('user','123'))
print(r.status_code)

異常處理

import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException
try:
    response = requests.get('http;//httpbin.org/get',timeout=0.5)
    print(response.status_code)
except ReadTimeout:
    print('TimeOut')
except HTTPError:
    print('Http error')
except RequestException:
    print('error')