糗事百科爬虫

申屠宗清
2023-12-01

代码如下:(友情提示输入quit结束调用)

Qiushi.py


#!/usr/bin/env python
#encoding:utf-8
#By eathings

import urllib2
import re

class Console_page:
	def __init__(self):
		self.page = 1
		self.enable = True
		self.qiushi=re.compile('<div.*?class="content".*?title="(.*?)">([\s\S]*?)</div>')
	def getpage(self, page):
		url = "http://www.qiushibaike.com/hot/page/" + str(page)
		response = urllib2.urlopen(url)
		result = response.read()
		en_result = re.sub("<br/>", "", result)
		items = re.findall(self.qiushi, en_result)
		return items
	
	def show_page(self,page):
		view = self.getpage(page)
		for i in view:
			print u"第 %d页"%page, i[0], i[1]
			inputs = raw_input()
			if inputs == 'quit':
				self.enable = False
				break

	def startread(self):
		while self.enable:
			#print self.page
			page = self.page
			self.show_page(page)
			self.page += 1

qiushibaike = Console_page()
qiushibaike.startread()

 类似资料: