From a899d721b14da9da311377023019e11de7a11f3c Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Wed, 19 Aug 2020 11:53:45 +0200 Subject: [PATCH 1/8] Check account disk quotas before writing using to_carto --- cartoframes/auth/credentials.py | 8 ++++++-- cartoframes/io/carto.py | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cartoframes/auth/credentials.py b/cartoframes/auth/credentials.py index 9cd61ba68..29bbc39a9 100644 --- a/cartoframes/auth/credentials.py +++ b/cartoframes/auth/credentials.py @@ -109,15 +109,19 @@ def session(self, session): """Set session""" self._session = session + @property + def me_data(self): + api_key_auth_client = self.get_api_key_auth_client() + return api_key_auth_client.send(ME_SERVICE, 'get').json() + @property def user_id(self): """Credentials user ID""" if not self._user_id: log.debug('Getting `user_id` for {}'.format(self._username)) - api_key_auth_client = self.get_api_key_auth_client() try: - user_me = api_key_auth_client.send(ME_SERVICE, 'get').json() + user_me = self.me_data() user_data = user_me.get('user_data') if user_data: self._user_id = user_data.get('id') diff --git a/cartoframes/io/carto.py b/cartoframes/io/carto.py index 020d9a433..5ff7b203c 100644 --- a/cartoframes/io/carto.py +++ b/cartoframes/io/carto.py @@ -108,6 +108,13 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col context_manager = ContextManager(credentials) + dataframe_size = dataframe.memory_usage(index=False, deep=True).sum() + remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') + if remaining_byte_quota is not None and dataframe_size > remaining_byte_quota: + raise CartoException('DB Quota will be exceeded. ' + 'The remaining quota is {} bytes and the dataset size is {} bytes.'.format( + remaining_byte_quota, dataframe_size)) + gdf = GeoDataFrame(dataframe, copy=True) if index: From 084b6eb15fef21471ea5aee962d3fa6ccf21cd10 Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Wed, 19 Aug 2020 12:21:33 +0200 Subject: [PATCH 2/8] Fixing tests --- cartoframes/auth/credentials.py | 10 ++++++++-- cartoframes/io/carto.py | 14 ++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/cartoframes/auth/credentials.py b/cartoframes/auth/credentials.py index 29bbc39a9..c02cc76d0 100644 --- a/cartoframes/auth/credentials.py +++ b/cartoframes/auth/credentials.py @@ -111,8 +111,14 @@ def session(self, session): @property def me_data(self): - api_key_auth_client = self.get_api_key_auth_client() - return api_key_auth_client.send(ME_SERVICE, 'get').json() + me_data = {} + + try: + me_data = self.get_api_key_auth_client().send(ME_SERVICE, 'get').json() + except Exception: + pass + + return me_data @property def user_id(self): diff --git a/cartoframes/io/carto.py b/cartoframes/io/carto.py index 5ff7b203c..74dc198e3 100644 --- a/cartoframes/io/carto.py +++ b/cartoframes/io/carto.py @@ -108,12 +108,14 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col context_manager = ContextManager(credentials) - dataframe_size = dataframe.memory_usage(index=False, deep=True).sum() - remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') - if remaining_byte_quota is not None and dataframe_size > remaining_byte_quota: - raise CartoException('DB Quota will be exceeded. ' - 'The remaining quota is {} bytes and the dataset size is {} bytes.'.format( - remaining_byte_quota, dataframe_size)) + if context_manager.credentials.me_data is not None and context_manager.credentials.me_data.get('user_data'): + dataframe_size = dataframe.memory_usage(index=False, deep=True).sum() + remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') + + if remaining_byte_quota is not None and dataframe_size > remaining_byte_quota: + raise CartoException('DB Quota will be exceeded. ' + 'The remaining quota is {} bytes and the dataset size is {} bytes.'.format( + remaining_byte_quota, dataframe_size)) gdf = GeoDataFrame(dataframe, copy=True) From fd60f3b320cae0b74930979b29f984bd6bfc95bb Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Thu, 20 Aug 2020 11:01:29 +0200 Subject: [PATCH 3/8] Added ignore_quota_warning parameter --- cartoframes/io/carto.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cartoframes/io/carto.py b/cartoframes/io/carto.py index 74dc198e3..a3c7b9383 100644 --- a/cartoframes/io/carto.py +++ b/cartoframes/io/carto.py @@ -70,7 +70,7 @@ def read_carto(source, credentials=None, limit=None, retry_times=3, schema=None, @send_metrics('data_uploaded') def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col=None, index=False, index_label=None, - cartodbfy=True, log_enabled=True): + cartodbfy=True, log_enabled=True, ignore_quota_warning=False): """Upload a DataFrame to CARTO. The geometry's CRS must be WGS 84 (EPSG:4326) so you can use it on CARTO. Args: @@ -85,6 +85,9 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col uses the name of the index from the dataframe. cartodbfy (bool, optional): convert the table to CARTO format. Default True. More info `here `. + ignore_quota_warning (bool, optional): ignore the warning of the possible quota exceeded + and force the upload. + (The upload will still fail if the size of the dataset exceeds the remaining DB quota). Returns: string: the table name normalized. @@ -108,14 +111,15 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col context_manager = ContextManager(credentials) - if context_manager.credentials.me_data is not None and context_manager.credentials.me_data.get('user_data'): - dataframe_size = dataframe.memory_usage(index=False, deep=True).sum() - remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') + if not ignore_quota_warning: + if context_manager.credentials.me_data is not None and context_manager.credentials.me_data.get('user_data'): + dataframe_size = len(dataframe.to_csv()) + remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') - if remaining_byte_quota is not None and dataframe_size > remaining_byte_quota: - raise CartoException('DB Quota will be exceeded. ' - 'The remaining quota is {} bytes and the dataset size is {} bytes.'.format( - remaining_byte_quota, dataframe_size)) + if remaining_byte_quota is not None and dataframe_size > remaining_byte_quota: + raise CartoException('DB Quota will be exceeded. ' + 'The remaining quota is {} bytes and the dataset size is {} bytes.'.format( + remaining_byte_quota, dataframe_size)) gdf = GeoDataFrame(dataframe, copy=True) From 6beccb543dca73b20bb6ae5e208d78e8716bd321 Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Mon, 24 Aug 2020 12:07:14 +0200 Subject: [PATCH 4/8] Fix tests --- tests/unit/viz/test_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/viz/test_source.py b/tests/unit/viz/test_source.py index b164a454b..50b7845bb 100644 --- a/tests/unit/viz/test_source.py +++ b/tests/unit/viz/test_source.py @@ -77,7 +77,7 @@ "type": "Feature", "geometry": { "type": "GeometryCollection", - "coordinates": None + "geometries": [] }, "properties": {} } From de6b944b42b600d4561ff5fdd4808d65ac12e185 Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Mon, 24 Aug 2020 13:11:05 +0200 Subject: [PATCH 5/8] Get the size using the length of the CSV of a sample and using a convertion ratio --- cartoframes/io/carto.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cartoframes/io/carto.py b/cartoframes/io/carto.py index a3c7b9383..5030839de 100644 --- a/cartoframes/io/carto.py +++ b/cartoframes/io/carto.py @@ -15,6 +15,9 @@ GEOM_COLUMN_NAME = 'the_geom' IF_EXISTS_OPTIONS = ['fail', 'replace', 'append'] +SAMPLE_ROWS_NUMBER = 100 +CSV_TO_CARTO_RATIO = 1.4 + @send_metrics('data_downloaded') def read_carto(source, credentials=None, limit=None, retry_times=3, schema=None, index_col=None, decode_geom=True, @@ -113,7 +116,8 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col if not ignore_quota_warning: if context_manager.credentials.me_data is not None and context_manager.credentials.me_data.get('user_data'): - dataframe_size = len(dataframe.to_csv()) + n = min(SAMPLE_ROWS_NUMBER, len(dataframe)) + dataframe_size = len(dataframe.sample(n=n).to_csv(header=False)) * len(dataframe) / n / CSV_TO_CARTO_RATIO remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') if remaining_byte_quota is not None and dataframe_size > remaining_byte_quota: From 12fa4d5a448b2e7c85154b0aabbe91850ac2286f Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Mon, 24 Aug 2020 13:38:30 +0200 Subject: [PATCH 6/8] Change parameter name. Improved doc, Improved code legibility --- cartoframes/io/carto.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cartoframes/io/carto.py b/cartoframes/io/carto.py index d0091aa8d..5995ad7e0 100644 --- a/cartoframes/io/carto.py +++ b/cartoframes/io/carto.py @@ -76,7 +76,7 @@ def read_carto(source, credentials=None, limit=None, retry_times=3, schema=None, @send_metrics('data_uploaded') def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col=None, index=False, index_label=None, cartodbfy=True, log_enabled=True, retry_times=3, max_upload_size=MAX_UPLOAD_SIZE_BYTES, - ignore_quota_warning=False): + skip_quota_warning=False): """Upload a DataFrame to CARTO. The geometry's CRS must be WGS 84 (EPSG:4326) so you can use it on CARTO. Args: @@ -91,9 +91,9 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col uses the name of the index from the dataframe. cartodbfy (bool, optional): convert the table to CARTO format. Default True. More info `here `. - ignore_quota_warning (bool, optional): ignore the warning of the possible quota exceeded - and force the upload. + skip_quota_warning (bool, optional): skip the quota exceeded check and force the upload. (The upload will still fail if the size of the dataset exceeds the remaining DB quota). + Default is False. retry_times (int, optional): Number of time to retry the upload in case it fails. Default is 3. @@ -119,16 +119,18 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col context_manager = ContextManager(credentials) - if not ignore_quota_warning: - if context_manager.credentials.me_data is not None and context_manager.credentials.me_data.get('user_data'): + if not skip_quota_warning: + me_data = context_manager.credentials.me_data + if me_data is not None and me_data.get('user_data'): n = min(SAMPLE_ROWS_NUMBER, len(dataframe)) - dataframe_size = len(dataframe.sample(n=n).to_csv(header=False)) * len(dataframe) / n / CSV_TO_CARTO_RATIO + estimated_byte_size = len(dataframe.sample(n=n).to_csv(header=False)) * len(dataframe) \ + / n / CSV_TO_CARTO_RATIO remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') - if remaining_byte_quota is not None and dataframe_size > remaining_byte_quota: + if remaining_byte_quota is not None and estimated_byte_size > remaining_byte_quota: raise CartoException('DB Quota will be exceeded. ' 'The remaining quota is {} bytes and the dataset size is {} bytes.'.format( - remaining_byte_quota, dataframe_size)) + remaining_byte_quota, estimated_byte_size)) gdf = GeoDataFrame(dataframe, copy=True) From 214a79c22576a7a84be31b432417126a12d864cd Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Mon, 24 Aug 2020 14:17:42 +0200 Subject: [PATCH 7/8] Test skip_quota_warning --- cartoframes/io/carto.py | 2 +- tests/unit/io/test_carto.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/cartoframes/io/carto.py b/cartoframes/io/carto.py index 5995ad7e0..9575eb2a9 100644 --- a/cartoframes/io/carto.py +++ b/cartoframes/io/carto.py @@ -125,7 +125,7 @@ def to_carto(dataframe, table_name, credentials=None, if_exists='fail', geom_col n = min(SAMPLE_ROWS_NUMBER, len(dataframe)) estimated_byte_size = len(dataframe.sample(n=n).to_csv(header=False)) * len(dataframe) \ / n / CSV_TO_CARTO_RATIO - remaining_byte_quota = context_manager.credentials.me_data.get('user_data').get('remaining_byte_quota') + remaining_byte_quota = me_data.get('user_data').get('remaining_byte_quota') if remaining_byte_quota is not None and estimated_byte_size > remaining_byte_quota: raise CartoException('DB Quota will be exceeded. ' diff --git a/tests/unit/io/test_carto.py b/tests/unit/io/test_carto.py index 2c9cd83fa..e68e78a2d 100644 --- a/tests/unit/io/test_carto.py +++ b/tests/unit/io/test_carto.py @@ -8,6 +8,7 @@ from shapely.geometry.base import BaseGeometry from shapely import wkt +from carto.exceptions import CartoException from cartoframes.auth import Credentials from cartoframes.io.managers.context_manager import ContextManager from cartoframes.io.carto import read_carto, to_carto, copy_table, create_table_from_query @@ -249,6 +250,28 @@ def test_to_carto(mocker): assert norm_table_name == table_name +def test_to_carto_quota_warning(mocker): + class NoQuotaCredentials(Credentials): + @property + def me_data(self): + return { + 'user_data': { + 'remaining_byte_quota': 0 + } + } + + # Given + table_name = '__table_name__' + cm_mock = mocker.patch.object(ContextManager, 'copy_from') + cm_mock.return_value = table_name + + df = GeoDataFrame({'geometry': [Point([0, 0])]}) + + # When + with pytest.raises(CartoException): + to_carto(df, table_name, NoQuotaCredentials('fake_user', 'fake_api_key')) + + def test_to_carto_chunks(mocker): # Given table_name = '__table_name__' From 4c8a751104011b2816dc33659bf262f37ca51cd1 Mon Sep 17 00:00:00 2001 From: antoniocarlon Date: Mon, 24 Aug 2020 14:20:17 +0200 Subject: [PATCH 8/8] Added test --- tests/unit/io/test_carto.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/tests/unit/io/test_carto.py b/tests/unit/io/test_carto.py index e68e78a2d..ae642f494 100644 --- a/tests/unit/io/test_carto.py +++ b/tests/unit/io/test_carto.py @@ -264,14 +264,40 @@ def me_data(self): table_name = '__table_name__' cm_mock = mocker.patch.object(ContextManager, 'copy_from') cm_mock.return_value = table_name - df = GeoDataFrame({'geometry': [Point([0, 0])]}) - # When + # Then with pytest.raises(CartoException): to_carto(df, table_name, NoQuotaCredentials('fake_user', 'fake_api_key')) +def test_to_carto_quota_warning_skip(mocker): + class NoQuotaCredentials(Credentials): + @property + def me_data(self): + return { + 'user_data': { + 'remaining_byte_quota': 0 + } + } + + # Given + table_name = '__table_name__' + cm_mock = mocker.patch.object(ContextManager, 'copy_from') + cm_mock.return_value = table_name + df = GeoDataFrame({'geometry': [Point([0, 0])]}) + + # When + norm_table_name = to_carto(df, table_name, NoQuotaCredentials('fake_user', 'fake_api_key'), + skip_quota_warning=True) + + # Then + assert cm_mock.call_args[0][1] == table_name + assert cm_mock.call_args[0][2] == 'fail' + assert cm_mock.call_args[0][3] is True + assert norm_table_name == table_name + + def test_to_carto_chunks(mocker): # Given table_name = '__table_name__'