1. 程式人生 > >Python爬蟲-破解有道詞典(破解MD5的JS加密演算法)

Python爬蟲-破解有道詞典(破解MD5的JS加密演算法)

破解有道詞典

1.進行普通爬取嘗試:

 1 '''
 2 破解有道詞典
 3 V1
 4 '''
 5 
 6 from urllib import request, parse
 7 
 8 
 9 def youdao(key):
10 
11     url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
12 
13     data = {
14         "i": "boy",
15         "from":"AUTO",
16         "to": "AUTO
", 17 "smartresult": "dict", 18 "client": "fanyideskweb", 19 "salt": "1523100789519", 20 "sign": "b8a55a436686cd89873fa46514ccedbe", 21 "doctype": "json", 22 "version": "2.1", 23 "keyfrom": "fanyi.web", 24 "action":"FY_BY_REALTIME", 25 "
typoResult": "false" 26 } 27 28 # 引數data需要是bytes格式 29 data = parse.urlencode(data).encode() 30 31 headers = { 32 "Accept": "application/json,text/javascript,*/*;q=0.01", 33 #"Accept-Encoding": "gzip,deflate", 34 "Accept-Language": "zh-CN,zh;q=0.9
", 35 "Connection": "keep-alive", 36 "Content-Length": "200", 37 "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", 38 "Cookie": "[email protected];JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517", 39 "Host": "fanyi.youdao.com", 40 "Origin": "http://fanyi.youdao.com", 41 "Referer": "http://fanyi.youdao.com/", 42 "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest" 43 } 44 45 req = request.Request(url=url, data=data, headers=headers) 46 47 rsp = request.urlopen(req) 48 49 html = rsp.read().decode() 50 print(html) 51 52 if __name__ == '__main__': 53 youdao("boy")

2.破解有道詞典的JS-,MD5-加密演算法進行爬取資料(處理JS加密程式碼)

 1 '''
 2 V2
 3 處理js加密程式碼
 4 '''
 5 
 6 '''
 7 通過查詢,能找到js程式碼中操作程式碼
 8 
 9 1. 這個是計算salt的公式 r = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
10 2. sign: n.md5("fanyideskweb" + t + r + "ebSeFb%=XZ%T[KZ)c(sy!");
11 md5一共需要四個引數,第一個和第四個都是固定值的字串,第三個是所謂的salt,第二個是。。。。。
12 第二個引數就是輸入的要查詢的單詞
13 
14 '''
15 
16 
17 def getSalt():
18     '''
19     salt公式是:  "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
20     把他翻譯成python程式碼
21     :return:
22     '''
23     import time, random
24 
25     salt = int(time.time()*1000) + random.randint(0,10)
26 
27     return salt
28 
29 def getMD5(v):
30     import hashlib
31     md5 = hashlib.md5()
32 
33     # update需要一共bytes格式的引數
34     md5.update(v.encode("utf-8"))
35 
36     sign = md5.hexdigest()
37 
38     return sign
39 
40 
41 def getSign(key, salt):
42 
43     sign = 'fanyideskweb'+ key + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!"
44     sign = getMD5(sign)
45 
46     return sign
47 
48 from urllib import request, parse
49 
50 
51 def youdao(key):
52 
53     url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
54 
55     salt = getSalt()
56 
57     data = {
58         "i": key,
59         "from":"AUTO",
60         "to": "AUTO",
61         "smartresult": "dict",
62         "client": "fanyideskweb",
63         "salt": str(salt) ,
64         "sign": getSign(key, salt),
65         "doctype": "json",
66         "version": "2.1",
67         "keyfrom": "fanyi.web",
68         "action":"FY_BY_REALTIME",
69         "typoResult": "false"
70     }
71 
72     print(data)
73 
74     # 引數data需要是bytes格式
75     data = parse.urlencode(data).encode()
76 
77     headers = {
78         "Accept": "application/json,text/javascript,*/*;q=0.01",
79         #"Accept-Encoding": "gzip,deflate",
80         "Accept-Language": "zh-CN,zh;q=0.9",
81         "Connection": "keep-alive",
82         "Content-Length": len(data),
83         "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
84         "Cookie": "[email protected];JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
85         "Host": "fanyi.youdao.com",
86         "Origin": "http://fanyi.youdao.com",
87         "Referer": "http://fanyi.youdao.com/",
88         "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
89     }
90 
91     req = request.Request(url=url, data=data, headers=headers)
92 
93     rsp = request.urlopen(req)
94 
95     html = rsp.read().decode()
96     print(html)
97 
98 if __name__ == '__main__':
99     youdao("boy")

=========================

==================================

==================================

======================================

 ==========================================

結果示例:

JS程式碼格式化工具:

http://tool.oschina.net/codeformat/js