1. 程式人生 > >python3 模擬POST請求時 “Content-Type”指定編碼會消失的問題

python3 模擬POST請求時 “Content-Type”指定編碼會消失的問題

HTTP請求 伺服器預設GBK編碼時 瀏覽器UTF-8編碼 如果請求時不指定編碼格式就會亂碼
python3 模擬POST請求時 “Content-Type”指定編碼會消失的問題

如下程式碼所示
在外部指定的”Content-Type”=”application/x-www-form-urlencoded; charset=UTF-8” 但是用抓包後返現charset=UTF-8消失了變成了 “Content-Type”=”application/x-www-form-urlencoded; ” 導致伺服器用GBK解碼,致使亂碼。

import http.cookiejar
import
urllib.request import urllib.parse def ungzip(data): try: data = gzip.decompress(data) except Exception as e: pass # print('未經壓縮, 無需解壓') return data def getOpener(head): """ deal with the Cookies """ cj = http.cookiejar.CookieJar() pro = urllib.request.HTTPCookieProcessor(cj) opener = urllib.request.build_opener(pro) header = [] for
key, value in head.items(): elem = (key, value) header.append(elem) opener.addheaders = header return opener header={ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", # ... other header } opener = getOpener(header) postDict={ "sql": "select '你好' from dual"
, "pageNum": "1", "pageSize": "100" } postData = urllib.parse.urlencode(postDict).encode() op = opener.open(url, postData, timeout=timeout) response = op.read() response = ungzip(response) response = response.decode() response = json.loads(response)

這裡寫圖片描述

分析除錯發現 上述方式指定的”Content-Type” 會被預設值取代 “application/x-www-form-urlencoded;”
具體原始碼分析如下 urllib.request.AbstractHTTPHandler.do_request_()

def do_request_(self, request):
host = request.host
if not host:
    raise URLError('no host given')

if request.data is not None:  # POST
    data = request.data
    if isinstance(data, str):
        msg = "POST data should be bytes or an iterable of bytes. " \
              "It cannot be of type str."
        raise TypeError(msg)
    # label_A 不是直接在給request賦值過Content-type 這地方會設定預設值
    if not request.has_header('Content-type'): 
        request.add_unredirected_header(
            'Content-type',
            'application/x-www-form-urlencoded')
    if not request.has_header('Content-length'):
        try:
            mv = memoryview(data)
        except TypeError:
            if isinstance(data, collections.Iterable):
                raise ValueError("Content-Length should be specified "
                        "for iterable data of type %r %r" % (type(data),
                        data))
        else:
            request.add_unredirected_header(
                    'Content-length', '%d' % (len(mv) * mv.itemsize))

sel_host = host
if request.has_proxy():
    scheme, sel = splittype(request.selector)
    sel_host, sel_path = splithost(sel)
if not request.has_header('Host'):
    request.add_unredirected_header('Host', sel_host)
for name, value in self.parent.addheaders:
    name = name.capitalize()
    # 如果是POST請求 並走到了上面的label_A處 這地方就不會重新設定Content-type 
    if not request.has_header(name):
        request.add_unredirected_header(name, value)

return request

解決方案
方案1、把原始碼中的 if not request.has_header(name) 判斷去掉就OK 啦
方案2、因為opener.open中的url可以是字串也可以是Request物件, 直接傳入攜帶header的物件Request就ok啦

#op = opener.open(url, postData, timeout=timeout)
op = opener.open(urllib.request.Request(url, headers=header), postData, timeout=timeout)