说来也巧,闲来无事,在微信中水群正好看到某群友说想爬点LOL(英雄联盟)的比赛数据,索性就要来了网址链接。网址链接:PentaQ官网
作为LPL粉丝,当然是先拿LPL模块做模板,闲话不多说,直接进入主题。
项目整体思路:
1、通过观察发现网站数据是按照赛区/赛季/赛事活动和游戏版本划分模块,换句话说,爬到某个版本中某个赛区某个赛季就可同理抓取其他版本/赛区/赛季的数据
2、以2019 LPL Summer All Patches为例,进行Overview/Team Stats/Player Stats三个模块数据爬取
爬虫语言:Python,这里对于没有Python环境的读者,建议使用Anaconda(Anaconda下载地址 )
IDE:采用Anaconda自带的Spyder,读者可根据自己使用习惯选择;接下来就是安装本次需要的python库和包,由于作者采用spyder,因此使用Anaconda Prompt进行所需模块的下载
下载模块一般采用:pip install module_name
#python模块导入
import json
import pandas as pd
import time
import requests
import datetime
from sqlalchemy import create_engine
import pymysql
import random
from numpy import *
import re,urllib.request
# from celery_app import app #定时器
Player Stats模块:作者采用的是阿里云MySQL数据库存储,读者可根据自己的存储方式,调整相应代码
class LPL():
#选手数据
def lpl_player_stats(self):
tour = 59,
url = 'https://data.pentaq.com/business_api/2018may/tournament_player_duty_data?tour=59&patch='
patch = '9.6.1', #可细分版本获取
headers = {
'Cookie':'抓取你的cookie',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Host': 'data.pentaq.com',
'Referer': 'https://data.pentaq.com/PlayerStats?tour=59',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
}
res = requests.get(url, headers)
# print(res)
js = json.loads(res.text)['data']['players_data']
#记得注释字段
player_name=[];team_name=[];team_full_name=[];psr=[];player_id=[];team_id=[];appear=[];win=[];win_rate=[];kill=[];dead=[];assist=[];kda=[];battle_rate_per_game=[];
solo_kill=[];solo_dead=[];ten_minutes_gold_offset_per_game=[];ten_minutes_creeps_per_game=[];
exp_diff_10m_per_game=[];damage_per_minute=[];damage_percent=[];damage_efficiency=[];tank_per_minute=[];tank_percent=[];
tank_efficiency=[];put_eye_per_minute=[];destroy_eye_per_minute=[];buy_true_eye_per_minute=[];duty_id=[];lose=[];insert_time=[]
for data in js:
# print(data['player_name'])
player_name.append(data['player_name'])
team_name.append(data['team_name'])
team_full_name.append(data['team_full_name'])
psr.append(data['psr'])
player_id.append(data['player_id'])
team_id.append(data['team_id'])
appear.append(data['appear'])
win.append(data['win'])
win_rate.append(data['win_rate'])
kill.append(data['kill'])
dead.append(data['dead'])
assist.append(data['assist'])
kda.append(data['kda'])
battle_rate_per_game.append(data['battle_rate_per_game'])
solo_kill.append(data['solo_kill'])
solo_dead.append(data['solo_dead'])
ten_minutes_gold_offset_per_game.append(data['ten_minutes_gold_offset_per_game'])
ten_minutes_creeps_per_game.append(data['ten_minutes_creeps_per_game'])
exp_diff_10m_per_game.append(data['exp_diff_10m_per_game'])
damage_per_minute.append(data['damage_per_minute'])
damage_percent.append(data['damage_percent'])
damage_efficiency.append(data['damage_efficiency'])
tank_per_minute.append(data['tank_per_minute'])
tank_percent.append(data['tank_percent'])
tank_efficiency.append(data['tank_efficiency'])
put_eye_per_minute.append(data['put_eye_per_minute'])
buy_true_eye_per_minute.append(data['buy_true_eye_per_minute'])
duty_id.append(data['duty_id'])
lose.append(data['lose'])
insert_time.append(datetime.datetime.now().strftime("%Y--%m--%d %H:%M:%S")) #由于工作要求,增加一个数据入库时间字段
# print(player_name,'\n')
dic={}
dic['player_name']=player_name
dic['team_name']=team_name
dic['team_full_name']=team_full_name
dic['psr']=psr
dic['player_id']=player_id
dic['team_id']=team_id
dic['appear']=appear
dic['win']=win
dic['win_rate']=win_rate
dic['kill']=kill
dic['dead']=dead
dic['assist']=assist
dic['kda']=kda
dic['battle_rate_per_game']=battle_rate_per_game
dic['solo_kill']=solo_kill
dic['solo_dead']=solo_dead
dic['ten_minutes_gold_offset_per_game']=ten_minutes_gold_offset_per_game
dic['ten_minutes_creeps_per_game']=ten_minutes_creeps_per_game
dic['exp_diff_10m_per_game']=exp_diff_10m_per_game
dic['damage_per_minute']=damage_per_minute
dic['damage_percent']=damage_percent
dic['damage_efficiency']=damage_efficiency
dic['tank_per_minute']=tank_per_minute
dic['tank_percent']=tank_percent
dic['tank_efficiency']=tank_efficiency
dic['put_eye_per_minute']=put_eye_per_minute
dic['buy_true_eye_per_minute']=buy_true_eye_per_minute
dic['duty_id']=duty_id
dic['lose']=lose
dic['insert_time']=insert_time
df=pd.DataFrame(dic)
pd.io.sql.to_sql(df, 'lpl_player_stats',create_engine('mysql+pymysql://'user_name':'DB_name'@'password'@'IP地址'/'DB_name'?charset=user_nameutf8mb4'),if_exists='append' ) #读者可使用自己的数据库链接信息
Overview 对于比赛记录中队伍的logo图片信息,MySQL数据库只能存储图片的链接信息,作者通过解析requests.get(url).content存储在本地
#战队ranking数据
def lpl_overview(self):
tour = 69,
url = 'https://data.pentaq.com/business_api/2018mar/tournament_situation?tour=69'
headers = {
'Cookie':'抓取你的cookie',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Host': 'data.pentaq.com',
'Referer': 'https://data.pentaq.com/?tour=69',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
}
res = requests.get(url, headers)
# print(res.text)
js = json.loads(res.text)['situations'][0]['teams']
# print(js)
team_id=[];name=[];small_logo_url=[];match_win=[];match_lose=[];match_draw=[];match_count=[];game_win=[];game_lose=[];game_count=[];match_win_rate=[];
game_win_rate=[];rank=[];insert_time=[]
for i in js:
# print(i['team_id'],'\n')
team_id.append(i['team_id'])
name.append(i['name'])
small_logo_url.append(i['small_logo_url'])
match_win.append(i['match_win'])
match_lose.append(i['match_lose'])
match_draw.append(i['match_draw'])
match_count.append(i['match_count'])
game_win.append(i['game_win'])
game_lose.append(i['game_lose'])
game_count.append(i['game_count'])
match_win_rate.append(i['match_win_rate'])
game_win_rate.append(i['game_win_rate'])
rank.append(i['rank'])
insert_time.append(datetime.datetime.now().strftime("%Y--%m--%d %H:%M:%S"))
dic={}
dic['team_id']=team_id
dic['name']=name
dic['small_logo_url']=small_logo_url
dic['match_win']=match_win
dic['match_lose']=match_lose
dic['match_draw']=match_draw
dic['match_count']=match_count
dic['game_win']=game_win
dic['game_lose']=game_lose
dic['game_count']=game_count
dic['match_win_rate']=match_win_rate
dic['game_win_rate']=game_win_rate
dic['rank']=rank
dic['insert_time']=insert_time
df=pd.DataFrame(dic)
# print(df['team_id'][:5])
pd.io.sql.to_sql(df, 'lpl_overview', create_engine('mysql+pymysql://'user_name':'DB_name'@'password'@'IP地址'/'DB_name'?charset=user_nameutf8mb4'),if_exists='append')
#Pick数据
def lpl_most_pick(self):
tour = 69,
url = 'https://data.pentaq.com/business_api/2018mar/tournament_most_pick?tour=69&team=&patch=&begin=&end=&duty=&born_color='
headers = {
'Cookie':'抓取你的cookie',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Host': 'data.pentaq.com',
'Referer': 'https://data.pentaq.com/?tour=69',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
}
res = requests.get(url, headers)
# print(res.text)
js = json.loads(res.text)['heros']
# print(js)
title=[];logo=[];pick_time=[];win_time=[];duty_ids=[];win_rate=[]
for i in js:
# print(i['duty_ids'][0],'\n')
title.append(i['title'])
logo.append(i['logo'])
pick_time.append(i['pick_time'])
win_time.append(i['win_time'])
duty_ids.append(i['duty_ids'][0])
if i['pick_time']!=0:
win_rate.append(round(i['win_time']/i['pick_time'],3))
else:
win_rate.append(0)
dic_most_pick={}
dic_most_pick['title']=title
dic_most_pick['logo']=logo
dic_most_pick['pick_time']=pick_time
dic_most_pick['win_time']=win_time
dic_most_pick['duty_ids']=duty_ids
dic_most_pick['win_rate']=win_rate
# print(dic_most_pick)
df=pd.DataFrame(dic_most_pick)
# print(df['duty_ids'])
pd.io.sql.to_sql(df, 'lpl_most_pick', create_engine('mysql+pymysql://'user_name':'DB_name'@'password'@'IP地址'/'DB_name'?charset=user_nameutf8mb4'),if_exists='append')
#获取图片,并存储
def load_photos(self):
conn = pymysql.connect(
host='ip',
# port='3306',
user='user_name',
password='password',
db='db_name',
charset='utf8mb4')
cur = conn.cursor()
#测试连接
try:
insert_sqli = 'select distinct team_a_logo from lpl_matches_new'
cur.execute(insert_sqli)
result=cur.fetchall()
# print(result)
except Exception as e:
print("插入数据失败:", e)
else:
# 如果是插入数据, 一定要提交数据, 不然数据库中找不到要插入的数据;
conn.commit()
print("插入数据成功;")
# print(type(result))
for i in result:
# print(i,'\n')
# print(i[0],'\n')
url=i[0]
text=i[0][51:53]
text_new=text.replace('/','',)
f=open('Team id is'+str(text_new)+'.jpg','wb')
# print(text_new)
res=requests.get(url)
# print(res.content)
buf=res.content
f.write(buf)
cur.close()
conn.close()
#比赛数据(分页加载)
def lpl_matches_new(self):
tour = 69,
url1='https://data.pentaq.com/business_api/2018mar/match_list?tour=69&team=&patch=&begin=&end=&page=1'
url = 'https://data.pentaq.com/business_api/2018mar/match_list?tour=69&team=&patch=&begin=&end=&page={}'
headers = {
'Cookie':'抓取你的cookie',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Host': 'data.pentaq.com',
'Referer': 'https://data.pentaq.com/?tour=69',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
}
res=requests.get(url1,headers)
# print(res.text)
count_matches=json.loads(res.text)['count']
js=json.loads(res.text)['matches']
length=len(js)
print(len(js))
if count_matches%length == 0:
page_size = int(count_matches/length)
else:
page_size = int(count_matches/length) + 1
print(page_size)
id=[];team_a_id=[];team_a_name=[];team_a_logo=[];team_a_win=[];team_b_id=[];team_b_name=[];team_b_logo=[];team_b_win=[];start_at=[];bo=[];lol_version=[];
for page in range(1,page_size+1):
resp = requests.get(url.format(page), headers)
# print(resp.text,'\n')
js=json.loads(resp.text)['matches']
# print(js)
for i in js:
# print(i,'\n')
id.append(i['id'])
team_a_id.append(i['team_a_id'])
team_a_name.append(i['team_a_name'])
team_a_logo.append(i['team_a_logo'])
team_a_win.append(i['team_a_win'])
team_b_id.append(i['team_b_id'])
team_b_name.append(i['team_b_name'])
team_b_logo.append(i['team_b_logo'])
team_b_win.append(i['team_b_win'])
start_at.append(i['start_at'])
bo.append(i['bo'])
lol_version.append(i['lol_version'])
# print(id,'\n')
dic_most_pick={}
dic_most_pick['id']=id
dic_most_pick['team_a_id']=team_a_id
dic_most_pick['team_a_name']=team_a_name
dic_most_pick['team_a_logo']=team_a_logo
dic_most_pick['team_a_win']=team_a_win
dic_most_pick['team_b_id']=team_b_id
dic_most_pick['team_b_name']=team_a_id
dic_most_pick['team_b_logo']=team_a_name
dic_most_pick['team_b_win']=team_a_logo
dic_most_pick['start_at']=team_a_win
dic_most_pick['bo']=team_b_id
dic_most_pick['lol_version']=lol_version
df=pd.DataFrame(dic_most_pick)
print(df)
pd.io.sql.to_sql(df,'lpl_matches_new',create_engine('mysql+pymysql://'user_name':'DB_name'@'password'@'IP地址'/'DB_name'?charset=user_nameutf8mb4'),if_exists='append')```
Team Stats 该模块和Player Stats模块类似
#战队数据
def lpl_team_stats(self):
tour = 69,
url = 'https://data.pentaq.com/business_api/2018may/tournament_team_data?tour=69&patch='
headers = {
'Cookie':'抓取你的cookie',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Host': 'data.pentaq.com',
'Referer': 'https://data.pentaq.com/?tour=69',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
}
res = requests.get(url, headers)
# print(res.text)
js = json.loads(res.text)['data']['teams_data']
# print(js)
team_name=[];team_full_name=[];appear=[];win=[];win_rate=[];duration_per_game=[];duration_per_win=[];duration_per_lose=[];kill=[];dead=[];bloodiness=[];minions_percent_per_game=[];
first_blood_percent=[];first_tower_percent=[];first_dragon_percent=[];first_baron_nashor_percent=[];dragon_percent=[];baron_nashor_percent=[];rift_hearld_percent=[];ten_minutes_gold_offset_per_game=[];
damage_per_minute=[];tank_per_minute=[];put_eye_per_minute=[];destroy_eye_per_minute=[];buy_true_eye_per_minute=[];lose=[]
for i in js:
# print(i,'\n')
team_name.append(i['team_name'])
team_full_name.append(i['team_full_name'])
appear.append(i['appear'])
win.append(i['win'])
win_rate.append(i['win_rate'])
duration_per_game.append(i['duration_per_game'])
duration_per_win.append(i['duration_per_win'])
duration_per_lose.append(i['duration_per_lose'])
kill.append(i['kill'])
dead.append(i['dead'])
bloodiness.append(i['bloodiness'])
minions_percent_per_game.append(i['minions_percent_per_game'])
first_blood_percent.append(i['first_blood_percent'])
first_tower_percent.append(i['first_tower_percent'])
first_dragon_percent.append(i['first_dragon_percent'])
first_baron_nashor_percent.append(i['first_baron_nashor_percent'])
dragon_percent.append(i['dragon_percent'])
baron_nashor_percent.append(i['baron_nashor_percent'])
rift_hearld_percent.append(i['rift_hearld_percent'])
ten_minutes_gold_offset_per_game.append(i['ten_minutes_gold_offset_per_game'])
damage_per_minute.append(i['damage_per_minute'])
tank_per_minute.append(i['tank_per_minute'])
put_eye_per_minute.append(i['put_eye_per_minute'])
destroy_eye_per_minute.append(i['destroy_eye_per_minute'])
buy_true_eye_per_minute.append(i['buy_true_eye_per_minute'])
lose.append(i['lose'])
dic_most_pick={}
dic_most_pick['team_name']=team_name
dic_most_pick['team_full_name']=team_full_name
dic_most_pick['appear']=appear
dic_most_pick['win']=win
dic_most_pick['win_rate']=win_rate
dic_most_pick['duration_per_game']=duration_per_game
dic_most_pick['duration_per_win']=duration_per_win
dic_most_pick['duration_per_lose']=duration_per_lose
dic_most_pick['kill']=kill
dic_most_pick['dead']=dead
dic_most_pick['bloodiness']=bloodiness
dic_most_pick['minions_percent_per_game']=minions_percent_per_game
dic_most_pick['first_blood_percent']=first_blood_percent
dic_most_pick['first_tower_percent']=first_tower_percent
dic_most_pick['first_dragon_percent']=first_dragon_percent
dic_most_pick['first_baron_nashor_percent']=first_baron_nashor_percent
dic_most_pick['dragon_percent']=dragon_percent
dic_most_pick['baron_nashor_percent']=baron_nashor_percent
dic_most_pick['rift_hearld_percent']=rift_hearld_percent
dic_most_pick['ten_minutes_gold_offset_per_game']=ten_minutes_gold_offset_per_game
dic_most_pick['damage_per_minute']=damage_per_minute
dic_most_pick['tank_per_minute']=tank_per_minute
dic_most_pick['put_eye_per_minute']=put_eye_per_minute
dic_most_pick['destroy_eye_per_minute']=destroy_eye_per_minute
dic_most_pick['buy_true_eye_per_minute']=buy_true_eye_per_minute
dic_most_pick['lose']=lose
df=pd.DataFrame(dic_most_pick)
# print(df[0:])
pd.io.sql.to_sql(df, 'lpl_team_stats', create_engine('mysql+pymysql://'user_name':'DB_name'@'password'@'IP地址'/'DB_name'?charset=user_nameutf8mb4'),if_exists='replace')
调用模块:
if __name__ =='__main__':
# LPL().lpl_player_stats()
# LPL().lpl_overview()
# LPL().lpl_most_pick()
# LPL().lpl_matches()
# LPL().load_photos()
# LPL().lpl_matches_new()
#LPL().lpl_team_stats()
#定时器
# @app.task
# def get_test():
# LPL().lpl_team_stats()
# return '执行成功'
在调用模块注释了定时器,采用的是celery模块,有兴趣的读者可以去官网阅读文档,这里不做解释说明,第一次写csdn文档,代码写的不好,各位看官,请轻喷!