from bs4 import BeautifulSoup
import requests
from html.parser import HTMLParser
from PIL import Image
#模拟登陆豆瓣
class MomNi():
#初始化
def init(self):
self.header = {
‘User-Agent’:’Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36’
}
self.session = requests.session()
#seession方法
#登录code
def longin(self,form_email,form_password,):
login_url = 'https://accounts.douban.com/login'
#登录url
response = self.session.get(login_url)
#get获取网址信息
(captch_id,captch_url) = _get_captcha(response.content.decode('utf-8'))
# 传入验证码的图片和标签
print(captch_id)
if captch_id:
r = self.session.get(captch_url)
with open('captcha.jpg','wb') as f:
f.write(r.content)
#如果ID 存在获取图片链接并写入
try:
img = Image.open('captch.jpg',mode="r")
img.show()
img.close()
except:
print('请输入验证码')
captcha_solution = input('please input solution for captcha[{}:'.format(captch_url))
#输入验证码
#没有验证码的参数
data = {
'form_email':form_email,
'form_password':form_password,
'source': 'index_nav',
'user_login': '登录',
'redir':'https: // www.douban.com /'
}
#验证码参数 }
if captch_id:
data['captcha-id'] = captch_id
data['captcha-solution'] = captcha_solution
self.session.post(login_url,data=data,headers=self.header)
# print(self.session.get('https://www.douban.com').content.decode('utf-8')) 登陆后内容
#发帖
def get_content(self,comment,ck = '6_29',):
data = {
'ck' : ck,
'comment':comment
}
self.session.post('https://www.douban.com/',data=data,headers=self.header)
#定义_attr方法,解析式调用
def _attr(attrs,attrname):
for attr in attrs:
if attr[0] == attrname:
return attr[1]
return None
def _get_captcha(content):
# 继承HTMLParser解析方法
class Captchaparser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.captch_id = None
self.captch_url = None
#使用HTMLparser解析标签和属性
def handle_starttag(self, tag, attrs):
#验证码图片
if tag == 'img' and _attr(attrs, 'id') == 'captcha_image' and _attr(attrs, 'class') == 'captcha_image':
self.captch_url = _attr(attrs,'src')
#验证码输入框
if tag == 'input' and _attr(attrs, 'type') == 'hidden' and _attr(attrs,'name') == 'captcha-id':
self.captch_id = _attr(attrs,'value')
p = Captchaparser()
# 实例化这个类
p.feed(content)
# 传入要解析的文本
return p.captch_id,p.captch_url
#将匹配到的数据返回
if __name__ == '__main__':
form_email = input('plase your email:')
form_password = input('plase your password:')
#输入用户名密码
Run = MomNi()
#调用方法
Run.longin(form_email,form_password)
#传入用户名和方法
comment = input('plase input comment:')
#发帖
Run.get_content(comment)
#调用发帖函数
code中没有回复发帖函数。
是因为在回复已发帖内容时,每个内容的代码ID都是唯一的。 不同发帖内容的ID各不相同。