1. 程式人生 > >利用爬蟲技術,仿有道翻譯小案例

利用爬蟲技術,仿有道翻譯小案例

import requests
import time
import hashlib
import json

inputInfo = input('請輸入你想要翻譯的內容:')
# 請求的url必須是點選翻譯後跳轉出來的頁面路由
url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
# 觀察form表單和header請求頭每次訪問會有哪些變數發生變換,此處form發生變化的有salt,sign,i(為查詢得字元),
# 根據表單提交發現變動的js,尋找相應js裡的salt,sign,值得設定和由來,並按相同得方法編輯其所需,
newtime 
= int(time.time() * 1000) print(newtime) e = inputInfo sign = "fanyideskweb" + e + str(newtime) + "sr_3(QOHT)L2dx#[email protected]" sign = hashlib.md5(sign.encode('utf-8')).hexdigest() formData = { "i": e, "from": "AUTO", "to": "AUTO", "smartresult": "dict", "client": "
fanyideskweb", "salt": newtime, "sign": sign, "doctype": "json", "version": "2.1", "keyfrom": "fanyi.web", "action": "FY_BY_CLICKBUTTION", "typoResult": "false", } headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "Accept-Encoding": "gzip, deflate
", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Connection": "keep-alive", # "Content-Length": "218", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", # 發生變化得是154開頭的時間戳 "Cookie": "[email protected]; JSESSIONID=aaayla8sm5ouFaBpGxBCw; OUTFOX_SEARCH_USER_ID_NCOO=702230926.9325526; ___rl__test__cookies={}".format( newtime), "Host": "fanyi.youdao.com", "Origin": "http://fanyi.youdao.com", "Pragma": "no-cache", "Referer": "http://fanyi.youdao.com/", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36", "X-Requested-With": "XMLHttpRequest", } # 將表單和請求頭一起提交,請求 response = requests.post(url, headers=headers, data=formData) content = response.content.decode() print(content) # loads將字串轉成字典,load res = json.loads(content) print(res) print(res['translateResult']) print('你的查詢結果為:', res['translateResult'][0][0]['tgt'])