实现12306登录,获取登录cookies
python + selenium + 超级鹰
# @author: zly
# @function: Touch verification code
# @time: 2020-09-15
# @copyright: All Rights Reversed
import time
import random
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from chaojiying import Chaojiying_Client
from constants import *
class MakeTrack:
"""
Track generator, need to pass a distance parameter
"""
def __init__(self, distance=DISTANCE):
self.distance = distance
def segmentate(self, s):
"""
Track splitter, the size of each piece of track is not divided
Returns a list object of a track block
:param
s --> Tracks to be segmented, int
"""
if SEGMENTNUM1 <= abs(s) < SEGMENTNUM2:
s = [round(s / 3) - 3, round(s / 3) + 3]
elif abs(s) >= SEGMENTNUM2:
s = [
round(s / 5) - 5, round(s / 5) - 3,
round(s / 5),
round(s / 5) + 3, round(s / 5) + 5
]
else:
s = [round(s)]
return s
def make_track(self):
"""
Make sliding track to simulate human normal movement
Return a list object of sliding track
"""
track = []
current = v0 = 0
while self.distance > current:
# 随机事件,随机加速度,生成随机位移
t = random.randint(1, 4) / 2
a = random.randint(1, 3)
# 速度、位移
v0 += a * t
s = v0 * t + 0.5 * a * t ** 2
# 将不和规则的较大的位移进行分割
seg = self.segmentate(round(s))
track.extend(seg)
current += s
# 对不超过目标位移或者不足位移做补偿
while True:
if sum(track) == self.distance:
break
elif sum(track) > self.distance:
track.pop()
else:
track.append(self.distance - sum(track))
if len(track) > TRACKMAXLENGTH:
self.make_track()
return track
class Login12306(Chaojiying_Client):
"""
:param
username 12306账号 --> str
password 12306密码 --> str
cusername 超级鹰账号 --> str
cpassword 超级鹰密码 --> str
soft_id 软件ID --> str
codetype 验证类型 --> int
path 验证码图片路径 --> str
There are three to config your init configration
1. by set constant
2. by set config dict
3. Direct set init configration
"""
def __init__(
self, username=None, password=None,
cusername=None, cpassword=None, soft_id=None,
codetype=None, path=None,
*args, **kwargs
):
# 配置优化,可以字典的形式传递参数
if kwargs.get('configs', 'None'):
# 连接超级鹰,初始化
super().__init__(
username=kwargs['configs'].get('cusername', ''),
password=kwargs['configs'].get('cpassword', ''),
soft_id=kwargs['configs'].get('soft_id', '')
)
self.username = kwargs['configs'].get('username', '')
self.password = kwargs['configs'].get('password', '')
self.cusername = kwargs['configs'].get('cusername', '')
self.cpassword = kwargs['configs'].get('cpassword', '')
self.soft_id = kwargs['configs'].get('soft_id', '')
self.codetype = kwargs['configs'].get('codetype', '')
self.path = kwargs['configs'].get('path', '')
elif USERNAME:
self.username = USERNAME
self.password = PASSWORD
self.cusername = CUSERNAME
self.cpassword = CPASSWORD
self.soft_id = SOFTID
self.codetype = CODETIPE
self.path = PATH
else:
# 连接超级鹰,初始化
super().__init__(
username=cusername,
password=cpassword,
soft_id=soft_id
)
self.username = username
self.password = password
self.cusername = cusername
self.cpassword = cpassword
self.soft_id = soft_id
self.codetype = codetype
self.path = path
self.run
@property
def run(self):
"""
You can call the run method directly for login verification,
or you can also call other methods to achieve this function
:return
Return false means login verification failed
Return true means login verification success
"""
self.driver = self.prepares()
self.driver.get('https://kyfw.12306.cn/otn/resources/login.html')
self.driver.implicitly_wait(IMPLICITLYWAIT)
self.driver.maximize_window()
time.sleep(1)
# 1.输入账号密码
self.input_user_pwd(username=self.username, password=self.password)
# 2.获取验证图片
self.get_pic()
while True:
# 3.识别图片,获取坐标
position, pic_id = self.get_position(codetype=self.codetype)
if not position:
position, pic_id = self.get_position(codetype=self.codetype)
# 4.图片验证
self.img_click(position)
# 5.登录
login = self.login(pic_id)
if not login:
self.driver.refresh()
self.input_user_pwd(username=self.username, password=self.password)
self.get_pic()
continue
# 6.滑动滑块
return True if self.slide() else False
def prepares(self):
"""
Break through 12306 webriver
Returns a webdrive after anti pickling
"""
# 12306通过图片验证之后依然登陆不上,其中的原因是有webdriver反扒
# 要想突破反扒,就必须修改带有webdrive的标志,我们用selenium打开的浏览器
# 上面往往都会显示 Chrome正受到自动测试软件的控制
# 因此我们需要修改Options和selenium浏览器的js标志navigator
# selenium控制的浏览器默认是true/false,正常的是undefined
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
driver.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": "Object.defineProperty("
"navigator, 'webdriver', "
"{get: () => undefined})"
}
)
return driver
def input_user_pwd(self, username=None, password=None):
"""
Enter 12306 account and password
@username: 12306账号 --> str, defalut is None
@password: 12306密码 --> str, defalut is None
The return 0 here has no effect, it just means the end of the function
"""
# 切换至账号密码登录
self.driver.find_element_by_xpath('//li[@class="login-hd-account"]/a').click()
# 这里需要睡1-2秒,否则会报错,加载js,浏览器js没有代码快
time.sleep(2)
# 输入账号密码
self.driver.find_element_by_id('J-userName').send_keys(username)
self.driver.find_element_by_id('J-password').send_keys(password)
return 0
def get_pic(self):
"""
Get touch captcha image
The return 0 here has no effect, it just means the end of the function
"""
# 截图
self.driver.find_element_by_id('J-loginImg').screenshot(self.path)
return 0
def get_position(self, codetype=None):
"""
Get the touch coordinates of super Eagle verification
@soft_id: 软件ID --> str, defalut is None
@codetype: 验证类型 --> int, defalut is None
:return
a list object [position, pic_id]
"""
# 发送图片,获取坐标是
verify_data = self.PostPic(self.path, codetype)
print(verify_data)
# 如果成功获取坐标则格式化,否则return None
if verify_data['err_no'] == 0:
temp = verify_data['pic_str'].split('|')
position = [i.split(',') for i in temp]
return [position, verify_data['pic_id']]
else:
self.ReportError(verify_data['pic_id'])
return [None, verify_data['pic_id']]
def img_click(self, position):
"""
Get the touch coordinates of super Eagle verification
@position: 点触坐标 --> Nested list, [['55', '55'], ['88', '88']...]
The return 0 here has no effect, it just means the end of the function
"""
# 要点触的图片
element = self.driver.find_element_by_id('J-loginImg')
# 按照坐标值点击
for k in position:
# x、y需要int的原因:move_to_element_with_offset中x、y只能是int型
x = int(k[0])
y = int(k[1])
ActionChains(self.driver).move_to_element_with_offset(element, x, y).click().perform()
return 0
def login(self, pic_id=None):
"""
Its role is to log in and get cookies
Return true means the verification is successful, otherwise it fails
"""
# 登录,获取cookies
self.driver.find_element_by_id('J-login').click()
# 判断图片验证是否验证成功
verify_tag = self.driver.find_element_by_xpath('//*[@class="lgcode-error"]')
# 看verify_tag的display属性是否可见,可见则表示验证失败
if verify_tag.is_displayed():
# 别浪费钱,向超级鹰报个错
self.ReportError(pic_id)
print("图片验证失败,报错成功")
return False
print("图片验证成功")
time.sleep(3)
return True
def slide(self):
"""
Sliding verification,
if it's successful return cookies, or return False
"""
try:
# 定位滑块
element = self.driver.find_element_by_id('nc_1_n1z')
# 生成轨迹
track = MakeTrack().make_track()
# 滑动
ActionChains(self.driver).click_and_hold(element).perform()
[ActionChains(self.driver).move_by_offset(i, 0).perform() for i in track]
ActionChains(self.driver).release(element).perform()
# 时间取决于网速
time.sleep(5)
except Exception as e:
# stale element reference: element is not attached to the page document
# 页面刷新导致获取不到元素,若能够滑动通过此错误无需再管,不是每次都会发生
print(str(e))
time.sleep(10)
self.driver.quit()
return False
# 判断是否登陆成功
try:
self.driver.find_element_by_xpath('//*[@class="btn btn-primary ok"]').click()
cookies = self.driver.get_cookies()
print("恭喜您登陆成功")
print(cookies)
time.sleep(10)
self.driver.quit()
return True
except Exception as e:
print(str(e))
print("恭喜您登陆失败,再来一次吧")
time.sleep(10)
self.driver.quit()
return False
configs = {
'username': '', # 12306账号
'password': '', # 12306密码
'cusername': '', # 超级鹰账号
'cpassword': '', # 超级鹰密码
'soft_id': '', # 软件ID
'codetype': 9004, # 验证类型
'path': '' # 验证码图片路径
}
Login12306(configs=configs)
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, path, codetype):
"""
path: 图片路径
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
with open(path, 'rb') as f:
imagecontent = f.read()
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', imagecontent)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
# 12306账号
USERNAME = ''
# 12306密码
PASSWORD = ''
# 超级鹰账号
CUSERNAME = ''
# 超级鹰密码
CPASSWORD = ''
# 软件ID
SOFTID = ''
# 验证类型
CODETIPE = ''
# 验证码图片路径
PATH = ''
# 滑块滑动的距离,单位:px
DISTANCE = 425
# 轨迹分割规定大小
SEGMENTNUM1 = 30
SEGMENTNUM2 = 50
# 轨迹最大段数
TRACKMAXLENGTH = 30
# # 显性等待时间,单位:s
IMPLICITLYWAIT = 10