forked from gogogoutham/coinmarketcap-scraper
-
Notifications
You must be signed in to change notification settings - Fork 42
/
coinmarketcap.py
121 lines (93 loc) · 3.61 KB
/
coinmarketcap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
""" Module for requesting data from coinmarketcap.org and parsing it. """
from datetime import datetime
import json
import logging
import lxml.html
from random import random
import requests
import time
from future.utils import iteritems
baseUrl = "http://coinmarketcap.com"
graphBaseUrl = "http://graphs2.coinmarketcap.com" #Coinmarket cap endpoint changed from graphs to graphs2
countRequested = 0
interReqTime = 20
lastReqTime = None
def _request(target):
"""Private method for requesting an arbitrary query string."""
global countRequested
global lastReqTime
if lastReqTime is not None and time.time() - lastReqTime < interReqTime:
timeToSleep = random()*(interReqTime-time.time()+lastReqTime)*2
logging.info("Sleeping for {0} seconds before request.".format(timeToSleep))
time.sleep(timeToSleep)
logging.info("Issuing request for the following target: {0}".format(target))
r = requests.get(target)
lastReqTime = time.time()
countRequested += 1
if r.status_code == requests.codes.ok:
return r.text
else:
raise Exception("Could not process request. \
Received status code {0}.".format(r.status_code))
def requestList(type, view):
"""Request a list of all coins or tokens."""
assert(type == "tokens" or type == "coins",
"Can only request tokens or coins")
return _request("{0}/{1}/views/{2}/".format(
baseUrl,
type,
view))
def requestMarketCap(slug):
"""Request market cap data for a given coin slug."""
return _request("{0}/currencies/{1}/".format(
graphBaseUrl, slug))
def parseList(html, type):
"""Parse the information returned by requestList for view 'all'."""
data = []
docRoot = lxml.html.fromstring(html)
rows = docRoot.cssselect("table > tbody > tr")
for row in rows:
datum = {}
fields = row.cssselect("td")
# Name and slug
nameField = fields[1].cssselect("a")[0]
datum['name'] = nameField.text_content().strip()
datum['slug'] = nameField.attrib['href'].replace(
'/currencies/', '').replace('/', '').strip()
# Symbol
datum['symbol'] = fields[2].text_content().strip()
# Explorer link
supplyFieldPossible = fields[5].cssselect("a")
if len(supplyFieldPossible) > 0:
datum['explorer_link'] = supplyFieldPossible[0].attrib['href']
else:
datum['explorer_link'] = ''
data.append(datum)
return data
def parseMarketCap(jsonDump, slug):
""" """
data = []
rawData = json.loads(jsonDump)
# Covert data in document to wide format
dataIntermediate = {}
targetFields = [str(key.replace('_data', '')) for key in rawData.keys()]
for field, fieldData in iteritems(rawData):
for row in fieldData:
time = int(row[0]/1000)
if time not in dataIntermediate:
dataIntermediate[time] = dict(zip(targetFields, [None]*len(targetFields)))
dataIntermediate[time][field] = row[1]
# Generate derived data & alter format
times = sorted(dataIntermediate.keys())
for time in times:
datum = dataIntermediate[time]
datum['slug'] = slug
datum['time'] = datetime.utcfromtimestamp(time)
if (datum['market_cap_by_available_supply'] is not None
and datum['price_usd'] is not None
and datum['price_usd'] is not 0):
datum['est_available_supply'] = float(datum['market_cap_by_available_supply'] / datum['price_usd'])
else:
datum['est_available_supply'] = None
data.append(datum)
return data