当前位置: 首页 > 工具软件 > Douban CODE > 使用案例 >

豆瓣电影页面html代码,豆瓣电影信息查询(示例代码)

宰父玄天
2023-12-01

importrequestsfrom selenium importwebdriverfrom selenium.webdriver.support.wait importWebDriverWaitfrom selenium.webdriver.support importexpected_conditions as ECfrom selenium.webdriver.common.by importByfrom lxml.html importetreeimportbs4importjiebaimportwordcloudimportrefrom matplotlib importpyplotclassMovie():def __init__(self, name):

self.url= f‘https://search.douban.com/movie/subject_search?search_text={name}‘self.headers= ‘User-Agent="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)‘‘Chrome/78.0.3904.108 Safari/537.36"‘self.chrome_options=webdriver.ChromeOptions()

self.chrome_options.add_argument(‘--headless‘)

self.chrome_options.add_argument(‘--disable-gpu‘)

self.chrome_options.add_argument(self.headers)

self.browser= webdriver.Chrome(chrome_options=self.chrome_options, executable_path=‘chromedriver.exe‘)

self.wait= WebDriverWait(self.browser, 10)defget_search(self):

# 获取搜索结果,以便进一步选择

self.browser.get(self.url)

response= self.wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ‘.title > a‘)))ifresponse:print(‘请选择:‘)

movies=[]for i in range(10):

name=response[i].text

url= response[i].get_attribute(‘href‘)print(f‘{[i]}.{name}‘)

movies.append([name, url])

self.browser.close()returnmovieselse:print("没有搜到您要的信息,请重新输入")

self.get_search()defget_movie_info(self, movie):

# 在影片详情页面提取影片基本信息

name=movie[0]

url= movie[1]

headers= {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36‘}

resp= requests.get(url, headers=headers)try:if resp.status_code == 200:

soup= bs4.BeautifulSoup(resp.text, ‘html.parser‘)

info= soup.find(name=‘div‘, attrs={‘id‘: ‘info‘}).text

rating= soup.find(name=‘div‘, attrs={‘class‘: ‘rating_self‘})

rating_num=rating.strong.text

rating_people=rating.a.textprint(info)print(f‘评分: {rating_num}‘)print(rating_people)

text=self.get_reviews(url, headers)

self.word_cloud(name, text)else:returnNoneexceptrequests.exceptions:returnNone

@staticmethoddefget_reviews(url, headers):

#获取评论

text= ‘‘

for i in range(5):

url= f‘{url}reviews?start=i‘response= requests.get(url, headers=headers)

html=etree.HTML(response.text)

reviews= html.xpath(‘//*[@class="short-content"]/text()‘)

reviews= ‘‘.join(‘‘.join(reviews).split())

reviews= ‘‘.join(reviews.split(‘()‘))

text+=reviewsreturntext

@staticmethoddefword_cloud(name, word):

# 生成词云

name= re.sub(r‘[\/:*?"<>|

。,.?]+‘, ‘‘, name)

ls=jieba.lcut(word)

text= ‘ ‘.join(ls)

w= wordcloud.WordCloud(font_path=‘simkai.ttf‘, width=800, height=600, background_color=‘white‘)

w.generate(text)

w.to_file(f‘{name}.png‘)

pyplot.imshow(w)

pyplot.axis(False)

pyplot.show()defmain():

movie_name= input("请输入电影名称,即可查询对应的影片信息:")

m=Movie(movie_name)

movies=m.get_search()

num= input(‘请输入序号选择:‘)

num=int(num)

m.get_movie_info(movies[num])if __name__ == ‘__main__‘:

main()

 类似资料: