-
Notifications
You must be signed in to change notification settings - Fork 0
/
collect_data.py
72 lines (64 loc) · 1.69 KB
/
collect_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import asyncio
import aiohttp
from understat import Understat
import pandas as pd
import warnings
#Doesn't print out warnings so it is easier to review the terminal
warnings.filterwarnings("ignore")
#keys for the leagues
leagues = [
"epl",
"la_liga",
"bundesliga",
"serie_a",
"ligue_1"
]
database = []
async def get_xg(league, year):
async with aiohttp.ClientSession() as session:
understat = Understat(session)
ids = [item['id'] for item in await understat.get_league_results(league, year)]
season_data = [
[
shot['X'],
shot['Y'],
shot['situation'],
shot['shotType'],
shot['lastAction'],
shot['result'],
id,#for debugging
shot['xG'],
league,#for debugging
year #for debugging
]
for id in ids
for match_xgs in [await understat.get_match_shots(id)]
for team in ['a', 'h']
for shot in match_xgs[team]
]
return season_data
def export_csv(data):
df = pd.DataFrame(data)
df.columns = [
'X',
'Y',
'situation',
'shotType',
'lastAction',
'result',
'id',
'xG',
'league',
'year'
]
df.to_csv('data.csv', index=False)
loop = asyncio.get_event_loop()
for league in leagues:
for year in range(2014,2023):
try:
data=loop.run_until_complete(get_xg(league, year))
database.extend(data)
print("COMPLETED", league, year)
except:
print("FAILED", league, year)
export_csv(database)