遇到了一些坑,这个mechanize不支持js代码,如果遇到了
<button id="submit" type="button" οnclick="sign(this,'signin')" class="btn btn-banner mt10">提 交</button>
这样的js代码怎么都通不过...要是有人知道怎么弄欢迎告诉我.
起因是要褥packethub上的羊毛,然后查了一下脚本,发现了mechanize这个包,主要用来模拟浏览器进行操作
脚本如下
from mechanize import Browser
from bs4 import BeautifulSoup
import re
def login(br, url):
page = br.open(url)
br.select_form(nr=1)
for f in br.forms():
print f
br.form["email"] = "xxxxgmail.com"
br.form["password"] = "xxxx"
br.submit()
def browse(br, url):
page = br.open(url)
soup = BeautifulSoup(page.read(), "html5lib")
title_div = soup.find("div", class_="dotd-title")
title = title_div.get_text()
account_div = soup.find("div" ,id="account-bar-logged-in")
print account_div
return title
def click(br):
for link in br.links():
print link.url, link.text
req = br.click_link(url_regex=re.compile("freelearning-claim"))
print "req: ",req
for control in br.form.controls:
print control
print "type=%s, name=%s value=%s" % (control.type, control.name, br[control.name])
br.open(req)
print br.title(), br.geturl()
br = Browser()
br.set_handle_redirect(True)
success=True
try:
login(br, "http://www.packtpub.com")
print "LOGIN"
book_title = browse(br, "http://www.packtpub.com/packt/offers/free-learning")
print book_title
click(br)
except Exception, e:
success=False
error_message = str(e)
if(success): outcome = "Success: Grabbed the book " + book_title.strip() + " for free!"
然后自己写了些玩一玩的(还打算用这个抢票的...
主要是对一些表单的操作,但是现在不用js的页面应该很少了...
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys,mechanize
import re
reload(sys)
sys.setdefaultencoding('utf-8')
#Browser
br = mechanize.Browser()
#options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
#Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
'''
#debugging?
br.set_debug_http(True)
br.set_debug_redirects(True)
br.set_debug_responses(True)
'''
#User-Agent 模拟浏览器行为
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
#打开页面
r = br.open(sys.argv[1])
# print br.geturl()
#选择表单
# br.select_form(class_="s_form")
#查看所有表单
for f in br.forms():
print f
br.select_form(nr=0)
'''
print "html:\n", html
print "Response:\n", br.response().read()
print "Title:\n",br.title()
print "Info:\n",r.info()
'''
#选取link
for link in br.links():
if link.text=="机因":
print link.url + ": " + link.text , link.attrs, link.tag
#查看所有的control
for control in br.form.controls:
print control
print "type=%s, name=%s value=%s" % (control.type, control.name, br[control.name])
#跳转
new_link = br.click_link(text='机因')
br.open(new_link)
print br.title()