-
Notifications
You must be signed in to change notification settings - Fork 64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rtorre/ch56013/client for stream download of bq datasets #1512
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
from .geocoding import Geocoding | ||
from .isolines import Isolines | ||
from .bq_datasets import BQUserDataset | ||
|
||
__all__ = [ | ||
'Geocoding', | ||
'Isolines' | ||
'Isolines', | ||
'BQUserDataset', | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import requests | ||
from carto.utils import ResponseStream | ||
# from carto.auth import APIKeyAuthClient | ||
|
||
from carto.exceptions import CartoException | ||
|
||
# TODO: this shouldn't be hardcoded | ||
DO_ENRICHMENT_API_URL = 'http://localhost:7070/bq' | ||
|
||
|
||
class BQDataset: | ||
|
||
def __init__(self, name_id): | ||
self.name = name_id | ||
# TODO fix this crap | ||
self.session = requests.Session() | ||
self.api_key = 'my_valid_api_key' | ||
|
||
def download(self): | ||
url = DO_ENRICHMENT_API_URL + '/datasets/' + self.name | ||
params = {'api_key': self.api_key} | ||
|
||
try: | ||
response = self.session.get(url, | ||
params=params, | ||
stream=True) | ||
response.raise_for_status() | ||
except requests.HTTPError as e: | ||
if 400 <= response.status_code < 500: | ||
# Client error, provide better reason | ||
reason = response.json()['error'][0] | ||
error_msg = u'%s Client Error: %s' % (response.status_code, | ||
reason) | ||
raise CartoException(error_msg) | ||
else: | ||
raise CartoException(e) | ||
except Exception as e: | ||
raise CartoException(e) | ||
|
||
return response | ||
|
||
def download_stream(self): | ||
return ResponseStream(self.download()) | ||
|
||
|
||
class BQUserDataset: | ||
|
||
@staticmethod | ||
def name(name_id): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right now, in CF we are based on functions instead of classes, but in So, in here in CF I would spec something like:
but for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We talked offline about the python api. To summarize and for the record: On the one hand, this Let's do the following: let's stick to the proposed python api (private, undocumented), complete a full iteration (putting all the components together to work), then let's consider possible code reorgs or alternate API's. |
||
return BQDataset(name_id) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import unittest | ||
import pandas | ||
import geopandas | ||
from shapely import wkt | ||
|
||
from cartoframes.data.services import BQUserDataset | ||
|
||
|
||
EXPECTED_CSV_SAMPLE = """state_fips_code,county_fips_code,geo_id,tract_name,internal_point_geo | ||
60,10,60010950100,9501.0,POINT (-170.5618796 -14.2587411) | ||
60,10,60010950200,9502.0,POINT (-170.5589852 -14.2859572) | ||
60,10,60010950300,9503.0,POINT (-170.6310985 -14.2760947) | ||
60,10,60010950500,9505.0,POINT (-170.6651925 -14.2713653) | ||
60,10,60010950600,9506.0,POINT (-170.701028 -14.252446) | ||
""" | ||
|
||
|
||
class TestBQDataset(unittest.TestCase): | ||
|
||
def test_can_download_to_dataframe(self): | ||
result = BQUserDataset.name('census_tracts_american_samoa').download_stream() | ||
df = pandas.read_csv(result) | ||
|
||
self.assertEqual(df.shape, (18, 13)) | ||
|
||
# do some checks on the contents | ||
sample = pandas.DataFrame( | ||
df.head(), | ||
columns=( | ||
'state_fips_code', | ||
'county_fips_code', | ||
'geo_id', | ||
'tract_name', | ||
'internal_point_geo' | ||
) | ||
) | ||
sample['internal_point_geo'] = df['internal_point_geo'].apply(wkt.loads) | ||
geosample = geopandas.GeoDataFrame(sample, geometry='internal_point_geo') | ||
|
||
self.assertEqual(geosample.to_csv(index=False), EXPECTED_CSV_SAMPLE) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not very important since we are going to move it to another repo, but I would move
BQUserDataset
toclients
folder. The idea of this one is to be used for data servicesThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. I wrongly took it as "services offered" to CF's users.