当前位置: 首页 > 工具软件 > mechanize > 使用案例 >

python mechanize使用

萧业
2023-12-01

遇到了一些坑,这个mechanize不支持js代码,如果遇到了

<button id="submit" type="button" οnclick="sign(this,'signin')" class="btn btn-banner mt10">提 交</button>

这样的js代码怎么都通不过...要是有人知道怎么弄欢迎告诉我.


起因是要褥packethub上的羊毛,然后查了一下脚本,发现了mechanize这个包,主要用来模拟浏览器进行操作

脚本如下

from mechanize import Browser
from bs4 import BeautifulSoup
import re

def login(br, url):
    page = br.open(url)
    br.select_form(nr=1)
    for f in br.forms():
        print f
    br.form["email"] = "xxxxgmail.com"
    br.form["password"] = "xxxx"
    br.submit()
    
def browse(br, url):
    page = br.open(url)
    soup = BeautifulSoup(page.read(), "html5lib")
    title_div = soup.find("div", class_="dotd-title")
    title = title_div.get_text()
    account_div = soup.find("div" ,id="account-bar-logged-in")
    print account_div
    return title
 
def click(br):
    for link in br.links():
        print link.url, link.text
    req = br.click_link(url_regex=re.compile("freelearning-claim"))
    print "req: ",req
    for control in br.form.controls:
        print control
        print "type=%s, name=%s value=%s" % (control.type, control.name, br[control.name])
    br.open(req)
    print br.title(), br.geturl()
    
    
br = Browser()
br.set_handle_redirect(True)
success=True

try:
    login(br, "http://www.packtpub.com")
    print "LOGIN"
    book_title = browse(br, "http://www.packtpub.com/packt/offers/free-learning")
    print book_title
    click(br)
except Exception, e:
    success=False
    error_message = str(e)
    
if(success): outcome = "Success: Grabbed the book " + book_title.strip() + " for free!"

然后自己写了些玩一玩的(还打算用这个抢票的...

主要是对一些表单的操作,但是现在不用js的页面应该很少了...

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import sys,mechanize
import re
reload(sys)
sys.setdefaultencoding('utf-8')

#Browser
br = mechanize.Browser()

#options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)

#Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

'''
#debugging?
br.set_debug_http(True)
br.set_debug_redirects(True)
br.set_debug_responses(True)
'''
#User-Agent 模拟浏览器行为
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
#打开页面
r = br.open(sys.argv[1])
# print br.geturl()
#选择表单
# br.select_form(class_="s_form")
#查看所有表单
for  f in br.forms():
    print f
br.select_form(nr=0)

'''
print "html:\n", html
print "Response:\n", br.response().read()
print "Title:\n",br.title()
print "Info:\n",r.info()
'''
#选取link
for link in br.links():
    if link.text=="机因":
        print link.url + ": " + link.text , link.attrs, link.tag
#查看所有的control
for control in br.form.controls:
    print control
    print "type=%s, name=%s value=%s" % (control.type, control.name, br[control.name])
#跳转
new_link = br.click_link(text='机因')

br.open(new_link)
print br.title()




 类似资料: