-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsteam_spy.py
124 lines (93 loc) · 3.5 KB
/
steam_spy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import json
import pandas as pd
from requests_futures.sessions import FuturesSession
from tqdm import tqdm
import colorama
from colorama import Fore, Style
from reviews import convert_types
def construct_request_url(request, params):
url = 'https://steamspy.com/api.php'
if params:
param_strings = [f'request={request}']
param_strings.extend([key + '=' + value for key, value in params.items()])
url += '?' + '&'.join(param_strings)
return url
def parse_future(future, df, updated_df):
response = future.result()
if response.status_code == 200:
response_dict = json.loads(response.text)
key_to_col = {
'positive': 'positive_ratings',
'negative': 'negative_ratings',
'owners': 'owners',
'average_forever': 'average_playtime',
'median_forever': 'median_playtime',
'price': 'price',
}
relevant = {
key_to_col[key]: response_dict[key] for key in response_dict.keys()
& ['positive', 'negative', 'owners', 'average_forever',
'median_forever', 'price']
}
relevant = clean_data(relevant)
updated_df = updated_df.append(df.loc[future.index])
for key, value in relevant.items():
updated_df.loc[future.index, key] = value
else:
print(Fore.RED + f'Request error: code {response.status_code}')
return updated_df
def clean_data(data):
data['owners'] = data['owners'].replace(',', '').replace(' .. ', '-')
data['price'] = int(data['price']) / 100
return data
def write_data(df, messages=True):
if not df.empty:
if messages:
print(Fore.CYAN + '- Performing type conversion...')
df = convert_types(df, {
'appid': int,
'positive_ratings': int,
'negative_ratings': int,
'average_playtime': int,
'median_playtime': int,
'price': float,
})
if messages:
print(Fore.CYAN + '- Writing updated data to new CSV file...')
df.to_csv('data/steam_updated.csv', index=False)
def main():
colorama.init()
print(Fore.MAGENTA + Style.BRIGHT + '\n--- SteamSpy Script ---\n')
session = FuturesSession(max_workers=10)
futures = []
print(Fore.CYAN + '- Reading main Steam data CSV file...')
df = pd.read_csv('data/steam.csv')
updated_df = pd.DataFrame()
existing_appids = set()
try:
existing_data = pd.read_csv('data/steam_updated.csv')
print(Fore.YELLOW + '- Found existing file, skipping API calls for existing games...')
updated_df = updated_df.append(existing_data)
existing_appids = set(existing_data['appid'])
except FileNotFoundError:
pass
print(Fore.CYAN + '- Obtaining updated ratings and playtime data from SteamSpy API...\n' + Fore.RESET)
for index, row in df.iterrows():
appid = row['appid']
if appid in existing_appids:
continue
url = construct_request_url('appdetails', {
'appid': str(appid),
})
future = session.get(url)
future.index = index
futures.append(future)
for i, future in enumerate(tqdm(futures)):
updated_df = parse_future(future, df, updated_df)
if i % 1000 == 0:
write_data(updated_df, messages=False)
print()
write_data(updated_df)
print(Fore.GREEN + '\nDone.\n' + Style.RESET_ALL)
if __name__ == '__main__':
main()