当前位置: 首页 > 工具软件 > Douyu > 使用案例 >

python爬虫实验报告_Python爬虫实验报告之Big_Homework2_Douyu

阳宗清
2023-12-01

1 importrequests2 from lxml importetree3 from urllib importrequest4 importjson5

6 #全局变量(请求头+文件io对象)

7 headers ={8 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36 Edg/85.0.564.44'}9 file = open('./斗鱼.txt', 'w', encoding='utf-8')10

11

12 #采集前端源码

13 defindex():14 for num in range(1, 21):15 base_url = 'https://www.douyu.com/gapi/rkc/directory/mixList/2_181/{}'.format(num) #翻页

16 print('正在写入', base_url, '中的数据信息...')17 response = requests.get(base_url, headers=headers)18 response.encoding = 'uft-8' #解码

19 jsons =response.text20 #print(type(jsons))#jsons的数据类型是str

21 clean(jsons) #清洗数据函数

22

23

24 #清洗数据

25 defclean(jsons):26 dicts = json.loads(jsons) #将jsons的数据类型由字符型转换成字典型

27 #print(dicts)

28 info_list = dicts['data']['rl'] #提取主要信息

29 printt(info_list)30

31

32 #打印数据信息

33 defprintt(info_list):34 for i ininfo_list:35 room_number = i['rn']36 #print(room_number)

37 homeowner = i['nn']38 #print(homeowner)

39 heat = i['ol']40 #print(heat)

41 C2name = i['c2name']42 #整合数据信息

43 full_info = C2name + '房间号:' + room_number + '\t' + '房主:' + homeowner + '\t' + '热度:' +str(heat)44 #写入文件

45 file.write(full_info + '\n')46

47

48 if __name__ == '__main__':49 index()50 file.close()

 类似资料: