示例代码如下所示:
import random
import os
import requests
from lxml import etree
import time
from multiprocessing.dummy import Pool
url = 'https://www.pearvideo.com/category_1'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.52'
}
response = requests.get(url, headers=header).text
tree = etree.HTML(response)
li_list = tree.xpath('/html/body/div[2]/div[1]/div/ul/li')
videos = []
for li in li_list:
detial = 'https://www.pearvideo.com/' + li.xpath('.//a[@class="vervideo-lilink actplay"]/@href')[0]
name = li.xpath('.//a[@class="vervideo-lilink actplay"]/div[2]/text()')[0] + '.mp4'
id = li.xpath('.//a[@class="vervideo-lilink actplay"]/@href')[0].split('_')[1]
ram = random.Random()
address = f'https://www.pearvideo.com/videoStatus.jsp?contId={id}&mrd={ram}'
ajax_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.52',
'Referer': detial
}
content = requests.get(address, headers=ajax_header).json()
_url = content['videoInfo']['videos']['srcUrl']
password = content['systemTime']
# 虚拟地址_url =https://video.pearvideo.com/mp4/adshort/20190205/1667563305982-13557057_adpkg-ad_hd.mp4
# 真实地址video_url =https://video.pearvideo.com/mp4/adshort/20190205/cont-1514702-13557057_adpkg-ad_hd.mp4
video_url = _url.replace(password, f'cont-{id}')
videos.append({'name': name, 'video_url': video_url})
def download(_dict):
video_name = _dict['name']
video_url = _dict['video_url']
print(f'正在下载【{video_name}】')
video_data = requests.get(video_url, headers=header).content
with open(f'./videos/{video_name}', 'wb') as wfile:
wfile.write(video_data)
print(f'【{video_name}】已下载完成')
if __name__ == '__main__':
start_time = time.time()
if not os.path.exists('./videos'):
os.mkdir('./videos')
# 单线程下载
# for video in videos:
# download(video)
# 多线程下载
pool = Pool(4)
pool.map(download, videos)
end_time = time.time()
pool.close()
pool.join()
print(f'视频全部下载完成,共耗时:{end_time - start_time}s')
本次示例代码为pear视频中人物分类