Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check account disk quotas before writing using to_carto #1674

Merged
merged 10 commits into from
Aug 25, 2020
14 changes: 12 additions & 2 deletions cartoframes/auth/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,25 @@ def session(self, session):
"""Set session"""
self._session = session

@property
def me_data(self):
me_data = {}

try:
me_data = self.get_api_key_auth_client().send(ME_SERVICE, 'get').json()
except Exception:
pass

return me_data

@property
def user_id(self):
"""Credentials user ID"""
if not self._user_id:
log.debug('Getting `user_id` for {}'.format(self._username))
api_key_auth_client = self.get_api_key_auth_client()

try:
user_me = api_key_auth_client.send(ME_SERVICE, 'get').json()
user_me = self.me_data()
user_data = user_me.get('user_data')
if user_data:
self._user_id = user_data.get('id')
Expand Down
20 changes: 19 additions & 1 deletion cartoframes/io/carto.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

MAX_UPLOAD_SIZE_BYTES = 2000000000 # 2GB
SAMPLE_ROWS_NUMBER = 100
CSV_TO_CARTO_RATIO = 1.4


@send_metrics('data_downloaded')
Expand Down Expand Up @@ -74,7 +75,8 @@ def read_carto(source, credentials=None, limit=None, retry_times=3, schema=None,

@send_metrics('data_uploaded')
def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col=None, index=False, index_label=None,
cartodbfy=True, log_enabled=True, retry_times=3, max_upload_size=MAX_UPLOAD_SIZE_BYTES):
cartodbfy=True, log_enabled=True, retry_times=3, max_upload_size=MAX_UPLOAD_SIZE_BYTES,
skip_quota_warning=False):
"""Upload a DataFrame to CARTO. The geometry's CRS must be WGS 84 (EPSG:4326) so you can use it on CARTO.

Args:
Expand All @@ -89,6 +91,9 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col
uses the name of the index from the dataframe.
cartodbfy (bool, optional): convert the table to CARTO format. Default True. More info
`here <https://carto.com/developers/sql-api/guides/creating-tables/#create-tables>`.
skip_quota_warning (bool, optional): skip the quota exceeded check and force the upload.
(The upload will still fail if the size of the dataset exceeds the remaining DB quota).
Default is False.
retry_times (int, optional):
Number of time to retry the upload in case it fails. Default is 3.

Expand All @@ -114,6 +119,19 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col

context_manager = ContextManager(credentials)

if not skip_quota_warning:
me_data = context_manager.credentials.me_data
if me_data is not None and me_data.get('user_data'):
n = min(SAMPLE_ROWS_NUMBER, len(dataframe))
estimated_byte_size = len(dataframe.sample(n=n).to_csv(header=False)) * len(dataframe) \
/ n / CSV_TO_CARTO_RATIO
remaining_byte_quota = me_data.get('user_data').get('remaining_byte_quota')

if remaining_byte_quota is not None and estimated_byte_size > remaining_byte_quota:
raise CartoException('DB Quota will be exceeded. '
'The remaining quota is {} bytes and the dataset size is {} bytes.'.format(
remaining_byte_quota, estimated_byte_size))

gdf = GeoDataFrame(dataframe, copy=True)

if index:
Expand Down
49 changes: 49 additions & 0 deletions tests/unit/io/test_carto.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from shapely.geometry.base import BaseGeometry
from shapely import wkt

from carto.exceptions import CartoException
from cartoframes.auth import Credentials
from cartoframes.io.managers.context_manager import ContextManager
from cartoframes.io.carto import read_carto, to_carto, copy_table, create_table_from_query
Expand Down Expand Up @@ -249,6 +250,54 @@ def test_to_carto(mocker):
assert norm_table_name == table_name


def test_to_carto_quota_warning(mocker):
class NoQuotaCredentials(Credentials):
@property
def me_data(self):
return {
'user_data': {
'remaining_byte_quota': 0
}
}

# Given
table_name = '__table_name__'
cm_mock = mocker.patch.object(ContextManager, 'copy_from')
cm_mock.return_value = table_name
df = GeoDataFrame({'geometry': [Point([0, 0])]})

# Then
with pytest.raises(CartoException):
to_carto(df, table_name, NoQuotaCredentials('fake_user', 'fake_api_key'))


def test_to_carto_quota_warning_skip(mocker):
class NoQuotaCredentials(Credentials):
@property
def me_data(self):
return {
'user_data': {
'remaining_byte_quota': 0
}
}

# Given
table_name = '__table_name__'
cm_mock = mocker.patch.object(ContextManager, 'copy_from')
cm_mock.return_value = table_name
df = GeoDataFrame({'geometry': [Point([0, 0])]})

# When
norm_table_name = to_carto(df, table_name, NoQuotaCredentials('fake_user', 'fake_api_key'),
skip_quota_warning=True)

# Then
assert cm_mock.call_args[0][1] == table_name
assert cm_mock.call_args[0][2] == 'fail'
assert cm_mock.call_args[0][3] is True
assert norm_table_name == table_name


def test_to_carto_chunks(mocker):
# Given
table_name = '__table_name__'
Expand Down