# !/usr/bin/env python
# -*-coding:utf-8-*-
# date :2021/11/8 16:59
# author:Sabo
import argparse
from os import system
from selenium import webdriver
from bs4 import BeautifulSoup as BS
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
from os import listdir
from os import remove
from os import mkdir
# you-get -F flv -o F:/KeyKeyKiYoMi -O 揭秘!憨憨up在家如何学舞!.mp4 "https://www.bilibili.com/video/None"
class BilibiliUp(object):
def __init__(self):
super(BilibiliUp, self).__init__()
self.headless_flag = True
self.home_page_suorce = ""
self.root_vedio_head = "https://www.bilibili.com/video/"
self.vedio_seq_numbers = []
self.vedio_links = []
self.vedio_titles = []
self.current_home_page = ""
self.count = 0
self.root_vedio_links = []
self.home_pages = []
self.download_video_flag = False
self.write_links_flag = False
self.debug = False
def set_debug(self):
self.debug = True
def clear_debug(self):
self.debug = False
def set_download_video_flag(self):
self.download_video_flag = True
def clear_download_video_flag(self):
self.download_video_flag = False
def set_write_links_flag(self):
self.write_links_flag = True
def clear_write_links_flag(self):
self.write_links_flag = False
def set_headless_flag(self):
self.headless_flag = True
def clear_headless_flag(self):
self.headless_flag = False
@staticmethod
def check_dir_exist(save_path, video_name):
# 测试save_patj 目录下的video_name 文件夹是否存在,不存在则创建
dirs = listdir(save_path)
if video_name not in dirs:
print("《%s》不存在!稍后将创建~" % (save_path + video_name))
mkdir(save_path + "/" + video_name)
return False
print("《%s》已经存在!无需创建~" % (save_path + video_name))
return True
def get_up_vedio_links(self, up_name):
root_url = "https://search.bilibili.com/"
if self.headless_flag:
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
else:
driver = webdriver.Chrome()
driver.get(root_url)
driver.implicitly_wait(3)
driver.maximize_window()
driver.find_element(by=By.CLASS_NAME, value="search-input-el").send_keys(up_name, Keys.ENTER)
# 点到用户栏
sleep(1)
driver.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[2]/div[1]/div[2]/div/nav/ul/li[8]/span').click()
# 点击第一个用户
sleep(1)
driver.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[2]/div[2]/div/div/div[2]/div/div/div/div/h2/a').click()
# 点击更多
sleep(1)
driver = self.switchToNowWindow(driver)
masterpiece_flag = True
# 有代表作
if masterpiece_flag:
try:
driver.find_element(by=By.CLASS_NAME, value='more').click()
masterpiece_flag = False
except:
print("有代表作元素点击失败,该up主没有设置代表作视频")
# 无代表作
elif masterpiece_flag:
try:
driver.find_element(by=By.CLASS_NAME, value='more').click()
masterpiece_flag = False
except:
print("无代表作元素点击失败,出错了")
elif masterpiece_flag:
print("出错了")
return None
driver = self.switchToNowWindow(driver)
sleep(1)
# 返回包含up主视频的所有链接
count = self.get_acount_of_home_page(driver.page_source)
page_source = driver.page_source
current_url = driver.current_url
driver.close()
return page_source, current_url, count
# 利用个人主页链接获取所有的页面信息
def get_all_page_suorce(self, home_vedio_links):
home_pages = []
if self.headless_flag:
for index in range(home_vedio_links.__len__()):
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
driver.get(home_vedio_links[index])
sleep(1)
driver.implicitly_wait(3)
home_pages.append(driver.page_source)
driver.close()
if self.debug:
break
else:
for index in range(home_vedio_links.__len__()):
driver = webdriver.Chrome()
driver.get(home_vedio_links[index])
sleep(1)
driver.implicitly_wait(3)
home_pages.append(driver.page_source)
driver.close()
if self.debug:
break
return home_pages
def get_acount_of_home_page(self, home_page):
# //*[@id="submit-video-list"]/ul[3]/span[1]
main_page = BS(home_page, "html.parser")
# 返回列表
child_page = main_page.find("span", attrs={"class": "be-pager-total"})
try:
count_str = child_page.get_text()
count = count_str.split(" ")[1]
except:
print("作品不足一页")
count = 1
return int(count)
# 获取所有的视频链接
def get_all_up_vedio_links(self, current_url):
vedio_links = []
vedio_links.append(current_url)
print("该up总共有 %d 页视频" % self.count)
for index in range(self.count):
vedio_links.append(current_url + "?tid=0&page=" + (index + 2).__str__() + "&keyword=&order=pubdate")
print("第 %d 页信息获取完毕~" % (index+1))
return vedio_links
# 切换到打当前窗口
def switchToNowWindow(self, driver):
window_handles = driver.window_handles
driver.switch_to.window(window_handles[-1])
return driver
# 获取b站视频编号, 和标题
def get_index_of_vedio(self, page_suorce):
video_index_list = []
video_title_list = []
main_page = BS(page_suorce, "html.parser")
child_page = main_page.find_all("a", class_="cover")
# 有重复去掉一后面的一般数据
child_page = child_page[0:(child_page.__len__()/2).__int__()]
for index in range(child_page.__len__()):
if self.debug:
print(index)
print(child_page[index])
print("index", child_page[index].get("href").__str__().split("/")[-1])
print("title", child_page[index].find_next("img").get("alt"))
print("\n")
video_index_list.append(child_page[index].get("href").__str__().split("/")[-1])
video_title_list.append(child_page[index].find_next("img").get("alt"))
# if self.debug:
# print(video_index_list)
# print(video_title_list)
# print(child_page.__len__())
# print(video_title_list.__len__())
# input()
return video_index_list, video_title_list
# 规范链接格式,合并主页网址和视频编号
def cat_links(self, vedio_seq_numbers):
vedio_link_list = []
for index in range(vedio_seq_numbers.__len__()):
vedio_link_list.append(self.root_vedio_head + vedio_seq_numbers[index].__str__())
return vedio_link_list
# 写入文件
def write_links_in_file(self, vedio_links, file_name, vedio_titles):
dir_list = listdir()
title_list = []
if (file_name + ".txt") not in dir_list:
with open(file=(file_name + ".txt"), mode="w+") as f:
f.close()
with open(file=(file_name + ".txt"), mode="r", encoding="utf-8") as f:
title_list = f.readlines()
f.close()
with open(file=(file_name + ".txt"), mode="a+", encoding="utf-8") as f:
for index in range(vedio_links.__len__()):
if ("Title : " + vedio_titles[index] + "\n") not in title_list:
f.seek(0)
f.write("Title : ")
f.write(vedio_titles[index] + "\n")
f.write("Link : ")
f.write(vedio_links[index] + "\n\n")
print("Title : %s" % vedio_titles[index])
print("Link : %s" % vedio_links[index])
else:
print("《" + vedio_titles[index].strip().__str__() + "》已经存在!")
f.close()
def download(self, savePath, videoName, videoUrl):
# TODU
# 处理结尾的特殊字符
# commond = 'you-get -F {} -o {} -O {} "{}"'.format(
# "flv".__str__(),
# savePath,
# videoName.replace("/", "").replace(" ", "-").replace('★', "-"),
# videoUrl)
# commond = "you-get -o {} -O {} \"{}\"".format(
# savePath,
# videoName.replace("/", "").replace(" ", "-").replace('★', "-").strip("?").strip("?").replace("|","-"),
# videoUrl)
print("Video title is : %s" % videoName)
commond = "you-get -o {} \"{}\"".format(
savePath,
videoUrl)
print(commond)
system(commond)
def downloadAll(self, savePath, videoLinks, videoTitle, keywords):
dirs = listdir(path=savePath)
# print(dirs)
for index in range(videoLinks.__len__()):
download_flag = True
for index_keywords in range(keywords.__len__()):
if keywords[index_keywords] in videoTitle[index]:
print("%s 不符合下载条件!" % videoTitle[index])
download_flag = False
break
if download_flag and (videoTitle[index].replace("/", "").replace(" ", "-").replace('★', "-") + ".mp4") in dirs:
print("{0}".format(videoTitle[index].replace("/", "").replace(" ", "-").replace('★', "-") + ".mp4"))
print("%s 已经存在!无需重复下载" % videoTitle[index])
download_flag = False
if download_flag:
print("downloading~~")
self.download(savePath=savePath, videoName=videoTitle[index], videoUrl=videoLinks[index], )
def print_line(self):
print(
"--------------------------------------------------------------------------------------------------------------------------------")
def delete_useless_files(self, path):
delete_type = [".xml", ".ts"]
files_list = listdir(path)
for file in files_list:
delete_flag = False
for index in range(delete_type.__len__()):
if delete_type[index] in file:
delete_flag = True
break
if delete_flag:
print("Delete {0}".format(path + "/" + file))
remove(path + "/" + file)
else:
print("keep {0}".format(path + "/" + file))
def get_up_names_from_commond(self):
parser = argparse.ArgumentParser(description="""input up\'s names (you want to download their or his video\n
use English \',\' to split them""")
parser.add_argument('--name', default='音乐私藏馆', help='input up\'s names')
args = parser.parse_args()
ans = str(args.name).split(',')
print('up\'s names : {}'.format(ans))
return ans
def main(self, up_name, save_path):
# self.current_home_page = https://space.bilibili.com/xxx/video
self.home_page_suorce, self.current_home_page, self.count = self.get_up_vedio_links(up_name)
self.root_vedio_links = self.get_all_up_vedio_links(current_url=self.current_home_page)
# 获取所有的page suorce
self.home_pages = self.get_all_page_suorce(self.root_vedio_links)
for index in range(self.home_pages.__len__()):
# 遍历所有的 home_pages 用以提取有效信息
self.vedio_seq_numbers, self.vedio_titles = self.get_index_of_vedio(page_suorce=self.home_pages[index])
# 规范链接格式,合并主页网址和视频编号
self.vedio_links = self.cat_links(vedio_seq_numbers=self.vedio_seq_numbers)
self.print_line()
print("index = {0}".format(index.__str__()))
if self.write_links_flag:
self.write_links_in_file(vedio_links=self.vedio_links,
file_name=save_path + "/" + up_name.__str__(),
vedio_titles=self.vedio_titles)
if self.download_video_flag:
self.downloadAll(savePath=save_path, videoLinks=self.vedio_links,
videoTitle=self.vedio_titles, keywords=[])
self.print_line()
self.delete_useless_files(save_path)
if __name__ == '__main__':
# 孟德兄的好朋友
# up_name = "KeyKeyKiYoMi"
# "卡卡Orange",
# up_name = [ "卡卡Orange", "KeyKeyKiYoMi", "孟德兄的好朋友"]
check_dir = r"F:/b站up主们的视频/"
bilibili_test = BilibiliUp()
# up_name = BilibiliUp.get_up_names_from_commond(self=bilibili_test)
# "宋姝儿" ,"多多多莉子",, "校花大长腿研究所", "单色舞蹈官方"
up_name = ["gaoming714"]
# bilibili_test.set_headless_flag()
bilibili_test.clear_headless_flag()
bilibili_test.set_download_video_flag()
bilibili_test.set_write_links_flag()
bilibili_test.set_download_video_flag()
bilibili_test.clear_debug()
for index in range(up_name.__len__()):
save_path = check_dir + up_name[index].__str__().strip()
bilibili_test.check_dir_exist(save_path=check_dir, video_name=up_name[index])
bilibili_test.main(up_name[index], save_path)