1. 程式人生 > >Python3.4 12306 2015年3月驗證碼識別

Python3.4 12306 2015年3月驗證碼識別

like target bottom edr ocr extra spl apple creat

import ssl
import json
from PIL import Image
import requests
import re
import urllib.request as urllib2
if hasattr(ssl, ‘_create_unverified_context‘):
    ssl.create_default_context = ssl._create_unverified_context
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"
pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"
def imgCut():
   pic_obj = Image.open(‘./tmp.jpg‘)
   box = (120, 0, 290, 25)
   region = pic_obj.crop(box)
   region.save(‘./text.jpg‘)
def ocrApi():
    filename = ‘./text.jpg‘
    upload_pic_url = "http://cn.docs88.com/pdftowordupload2.php"
    filename_tmp = filename.split(‘/‘)[-1]
    headers_fake = {
        ‘ccept‘: ‘*/*‘,
        ‘Accept-Encoding‘: ‘gzip, deflate‘,
        ‘Accept-Language‘: ‘zh-CN,zh;q=0.8,en;q=0.6‘,
        ‘Connection‘: ‘keep-alive‘,
        ‘Host‘: ‘cn.docs88.com‘,
        ‘Origin‘: ‘http://cn.docs88.com‘,
        ‘User-Agent‘: ‘Mozilla/5.0 (KHTML, like Gecko) Chrome/41.0.2272.89‘,
        ‘X-Requested-With‘: ‘ShockwaveFlash/17.0.0.134‘,
        }
    para = {‘Filename‘: filename_tmp,
            ‘sourcename‘: filename_tmp,
            ‘sourcelanguage‘: ‘cn‘,
            ‘desttype‘: ‘txt‘,
            ‘Upload‘: ‘Submit Query‘,
              }
    upload_pic = requests.post(upload_pic_url, data=para, files={"Filedata" : open(filename, ‘rb‘)}, headers=headers_fake)

    text_result_url = ‘http://cn.docs88.com/‘ + str(upload_pic.content)[5:-1]
    text_result = urllib2.urlopen(text_result_url).read().decode()
    return text_result
def get_img():
    resp = urllib2.urlopen(pic_url)
    raw = resp.read()
    with open(‘./tmp.jpg‘, ‘wb‘) as fp:
        fp.write(raw)
    return Image.open(‘./tmp.jpg‘)
def get_sub_img(im, x, y):
    assert 0 <= x <= 3
    assert 0 <= y <= 2
    #WITH = HEIGHT = 68
    left = 5 + (67 + 5) * x
    top = 41 + (67 + 5) * y
    right = left + 67
    bottom = top + 67
    return im.crop((left, top, right, bottom))
def baidu_stu_lookup(im):
    url = "http://stu.baidu.com/n/image?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="
    im.save("./query_temp_img.png")
    raw = open("./query_temp_img.png", ‘rb‘).read()
    url = url + str(len(raw))
    req = urllib2.Request(url, raw, {‘Content-Type‘: ‘image/png‘, ‘User-Agent‘: UA})
    resp_url = urllib2.urlopen(req).read()
    url = "http://stu.baidu.com/n/searchpc?

queryImageUrl=" + urllib2.quote(resp_url) req = urllib2.Request(url, headers={‘User-Agent‘: UA}) resp = urllib2.urlopen(req) html = resp.read().decode() return baidu_stu_html_extract(html) def baidu_stu_html_extract(html): pattern = re.compile(r"keywords:‘(.*?)‘") matches = pattern.findall(html) if not matches: return ‘[UNKOWN]‘ json_str = matches[0] json_str = json_str.replace(‘\\x22‘, ‘"‘).replace(‘\\\\‘, ‘\\‘) result = [item[‘keyword‘] for item in json.loads(json_str)] return ‘|‘.join(result) if result else ‘[UNKOWN]‘ if __name__ == ‘__main__‘: im = get_img() imgCut() captcha_text = ocrApi() print(captcha_text) dic_list = {} count = 0 for y in range(2): for x in range(4): count += 1 im2 = get_sub_img(im, x, y) result = baidu_stu_lookup(im2) dic_list[count] = result print((y, x), result) if captcha_text.strip(): print(‘\n可能的結果是:‘) maybe_result = [] for v in dic_list: for c in range(len(captcha_text.strip())): text = (captcha_text)[c] if text in dic_list[v]: _str_res = ‘%s --- %s‘ % (v, dic_list[v]) maybe_result.append(_str_res) for r in list(set(maybe_result)): print(r) else: print(‘False‘)

<span style="font-family: Arial, Helvetica, sans-serif;">改自 https://gist.github.com/Evi1m0/fbbdb1ba7c66cc4e1bb2</span>
<span style="font-family: Arial, Helvetica, sans-serif;"></span><h2 style="font-family: ‘Microsoft Yahei‘, sans-serif; margin: 0px; padding: 0px; line-height: 26px;">轉載請註明作者與出處:<a target=_blank target="_blank" href="http://blog.csdn.net/u013511642" style="color: rgb(202, 0, 0); text-decoration: none;">http://blog.csdn.net/u013511642</a>   王小濤_同學</h2>

   

Python3.4 12306 2015年3月驗證碼識別