1. 程式人生 > >Python學習筆記22(urllib模塊)

Python學習筆記22(urllib模塊)

ror 轉換 decode urllib模塊 one 處理 context hello mpat

Python3和Python2的urllib模塊不太一樣,本篇文章是以Python3為前提。

1.urlopen的使用

import urllib.request

urllib.request.urlopen(url, data=None, [timeout, ]*, cafile=None, capath=None, cadefault=False, context=None)

#url:需要抓取的網頁
#data:Post提交的數據。默認為空,使用的是get請求,若data有數據則是Post請求
#timeout:設置網站的訪問超時時間
import urllib.request

response 
= urllib.request.urlopen(http://www.baidu.com) print(response.read().decode(utf-8)) #response.read() 獲取的數據格式為bytes類型 #需要decode(),轉換成str類型
#POST請求

import urllib.parse
import urllib.request

data = bytes(urllib.parse.urlencode({word: hello}), encoding=utf8)
response = urllib.request.urlopen(
http://httpbin.org/post, data=data) print(response.read())
#超時設置
import urllib.request

response = urllib.request.urlopen(http://httpbin.org/get, timeout=0.1)
print(response.read())

2.Request的使用

#get請求
import urllib.request

request = urllib.request.Request(https://python.org)
response = urllib.request.urlopen(request)
print(response.read().decode(utf-8)) #post請求 from urllib import request, parse url = http://httpbin.org/post headers = { User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT), Host: httpbin.org } dict = { name: Germey } data = bytes(parse.urlencode(dict), encoding=utf8) req = request.Request(url=url, data=data, headers=headers, method=POST) response = request.urlopen(req) print(response.read().decode(utf-8))

3.代理

import urllib.request

proxy_handler = urllib.request.ProxyHandler({
    http: http://127.0.0.1:9743,
    https: https://127.0.0.1:9743
})
opener = urllib.request.build_opener(proxy_handler)
response = opener.open(http://httpbin.org/get)
print(response.read().decode(‘utf-8‘))

4.Cookie

#獲取cookie
import http.cookiejar, urllib.request

cookie = http.cookiejar.CookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open(http://www.baidu.com)
for item in cookie:
    print(item.name+"="+item.value)

#獲取cookie並且保存在文件中
#有兩種格式,記得哪種格式存的哪種格式讀就好
#格式一
import http.cookiejar, urllib.request
filename = "cookie.txt"
cookie = http.cookiejar.MozillaCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open(http://www.baidu.com)
cookie.save(ignore_discard=True, ignore_expires=True)
#格式二
import http.cookiejar, urllib.request
filename = cookie.txt
cookie = http.cookiejar.LWPCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open(http://www.baidu.com)
cookie.save(ignore_discard=True, ignore_expires=True)


#以格式二讀cookie,並且訪問url
import http.cookiejar, urllib.request
cookie = http.cookiejar.LWPCookieJar()
cookie.load(cookie.txt, ignore_discard=True, ignore_expires=True)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open(http://www.baidu.com)
print(response.read().decode(utf-8))

5.異常處理

#urllib.error有兩個錯誤類URLError和HTTPError,HTTPError是URLError的子類,所以一般先捕捉小的錯誤類,再捕捉大的錯誤類

from urllib import request, error

try:
    response = request.urlopen(http://cuiqingcai.com/index.htm)
except error.HTTPError as e:
    print(e.reason, e.code, e.headers, sep=\n)
except error.URLError as e:
    print(e.reason)
else:
    print(Request Successfully)

Python學習筆記22(urllib模塊)