import ssl
import json
from PIL import Image
import urllib
import re
import urllib.request as urllib2
if hasattr(ssl, '_create_unverified_context'):
ssl.create_default_context = ssl._create_unverified_context
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"
pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"
def get_img():
resp = urllib2.urlopen(pic_url)
raw = resp.read()
with open('./tmp.jpg', 'wb') as fp:
fp.write(raw)
return Image.open('./tmp.jpg')
def get_sub_img(im, x, y):
assert 0 <= x <= 3
assert 0 <= y <= 2
WITH = HEIGHT = 68
left = 5 + (67 + 5) * x
top = 41 + (67 + 5) * y
right = left + 67
bottom = top + 67
return im.crop((left, top, right, bottom))
def baidu_stu_lookup(im):
url = "http://stu.baidu.com/n/image?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="
im.save("./query_temp_img.png")
raw = open("./query_temp_img.png", 'rb').read()
url = url + str(len(raw))
req = urllib2.Request(url, raw, {'Content-Type': 'image/png', 'User-Agent': UA})
resp_url = urllib2.urlopen(req).read()
url = "http://stu.baidu.com/n/searchpc?queryImageUrl=" + urllib2.quote(resp_url)
req = urllib2.Request(url, headers={'User-Agent': UA})
resp = urllib2.urlopen(req)
html = resp.read().decode()
return baidu_stu_html_extract(html)
def baidu_stu_html_extract(html):
pattern = re.compile(r"keywords:'(.*?)'")
matches = pattern.findall(html)
if not matches:
return '[UNKOWN]'
json_str = matches[0]
json_str = json_str.replace('\\x22', '"').replace('\\\\', '\\')
result = [item['keyword'] for item in json.loads(json_str)]
return '|'.join(result) if result else '[UNKOWN]'
if __name__ == '__main__':
im = get_img()
for y in range(2):
for x in range(4):
im2 = get_sub_img(im, x, y)
result = baidu_stu_lookup(im2)
print((y, x), result)
改自https://github.com/andelf/fuck12306/blob/master/fuck12306.py
Python 3.4 可用