Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

get table metadata and manage dataset privacy #691

Merged
merged 29 commits into from
May 28, 2019
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8a1ef8c
get table metadata
simon-contreras-deel May 22, 2019
cd5987f
get_privacy and set_privacy definition
simon-contreras-deel May 22, 2019
e376eec
Merge branch 'feature-sharing-viz' into dataset-metadata
simon-contreras-deel May 22, 2019
cf5de85
get privacy and set privacy implementation
simon-contreras-deel May 23, 2019
9ec8dad
privacy tests
simon-contreras-deel May 23, 2019
d1f899a
context and dataset mocks
simon-contreras-deel May 24, 2019
1505128
new test case using mocks
simon-contreras-deel May 24, 2019
b2922c9
details in dataset
simon-contreras-deel May 24, 2019
94aa460
using Mocks. From 28 seconds execution to 1
simon-contreras-deel May 24, 2019
bb4ee97
hound
simon-contreras-deel May 24, 2019
bccc448
ensuring module p2.7
simon-contreras-deel May 24, 2019
8106be1
dataset info
simon-contreras-deel May 24, 2019
447d99e
improving mocks with dataset_info
simon-contreras-deel May 24, 2019
38a0eb1
tests with dataset_info
simon-contreras-deel May 24, 2019
4cba183
hound
simon-contreras-deel May 24, 2019
24ed3b6
honer cr 1
simon-contreras-deel May 27, 2019
f4a4afb
honer cr 2
simon-contreras-deel May 27, 2019
37e48bd
_unsync
simon-contreras-deel May 27, 2019
64e9489
details
simon-contreras-deel May 27, 2019
e610772
hound happy about init files?
simon-contreras-deel May 27, 2019
26845de
fix layer dep
simon-contreras-deel May 27, 2019
c6cff2c
honor CR 3
simon-contreras-deel May 28, 2019
0c2da94
Merge branch 'feature-sharing-viz' into dataset-metadata
simon-contreras-deel May 28, 2019
b799db4
hound
simon-contreras-deel May 28, 2019
a52f120
fix from merge
simon-contreras-deel May 28, 2019
150281a
remove useless returns
simon-contreras-deel May 28, 2019
d8644e5
improve test
simon-contreras-deel May 28, 2019
00880f1
2.7 happy?
simon-contreras-deel May 28, 2019
a8ff4ea
fix merge error in main init
simon-contreras-deel May 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions cartoframes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from .credentials import Credentials
from .layer import BaseMap, QueryLayer, Layer
from .styling import BinMethod
from .dataset import Dataset
from .dataset import set_default_context
from .datasets import Dataset, set_default_context
from .__version__ import __version__

__all__ = [
Expand Down
2 changes: 1 addition & 1 deletion cartoframes/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from .analysis import Table
from .__version__ import __version__
from .columns import dtypes, date_columns_names
from .dataset import Dataset, recursive_read, _decode_geom, get_columns
from .datasets import Dataset, recursive_read, _decode_geom, get_columns

if sys.version_info >= (3, 0):
from urllib.parse import urlparse, urlencode
Expand Down
7 changes: 7 additions & 0 deletions cartoframes/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .dataset import Dataset, recursive_read, _decode_geom, get_columns, set_default_context
simon-contreras-deel marked this conversation as resolved.
Show resolved Hide resolved
from .dataset_info import DatasetInfo

__all__ = [
'Dataset',
'DatasetInfo'
]
41 changes: 32 additions & 9 deletions cartoframes/dataset.py → cartoframes/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import time
from tqdm import tqdm

from .columns import Column, normalize_names, normalize_name

from carto.exceptions import CartoException, CartoRateLimitException
from .geojson import load_geojson

from ..columns import Column, normalize_names, normalize_name
from ..geojson import load_geojson
from .dataset_info import DatasetInfo

# avoid _lock issue: https://github.com/tqdm/tqdm/issues/457
tqdm(disable=True, total=0) # initialise internal lock
Expand All @@ -26,9 +27,9 @@ class Dataset(object):
REPLACE = 'replace'
APPEND = 'append'

PRIVATE = 'private'
PUBLIC = 'public'
LINK = 'link'
PRIVATE = DatasetInfo.PRIVATE
PUBLIC = DatasetInfo.PUBLIC
LINK = DatasetInfo.LINK

STATE_LOCAL = 'local'
STATE_REMOTE = 'remote'
Expand All @@ -55,6 +56,7 @@ def __init__(self, table_name=None, schema='public',
self._cc = context or default_context
self._state = state
self._is_saved_in_carto = is_saved_in_carto
self._dataset_info = None

self._normalized_column_names = None

Expand Down Expand Up @@ -91,20 +93,34 @@ def get_dataframe(self):

def set_dataframe(self, df):
if self._df is None or not self._df.equals(df):
self._is_saved_in_carto = False
self._unsync()
self._df = df

def get_geodataframe(self):
return self._gdf

def set_geodataframe(self, gdf):
if self._gdf is None or not self._gdf.equals(gdf):
self._is_saved_in_carto = False
self._unsync()
self._gdf = gdf

def get_table_name(self):
return self._table_name

def get_dataset_info(self):
if not self._is_saved_in_carto:
raise CartoException('Your data is not synchronized with CARTO.'
'First of all, you should call upload method to save your data in CARTO.')

if self._dataset_info is None:
self._dataset_info = self._get_dataset_info()

return self._dataset_info

def set_dataset_info(self, privacy=None, name=None):
self._dataset_info = self.get_dataset_info()
self._dataset_info.update(privacy=privacy, name=name)

def upload(self, with_lonlat=None, if_exists=FAIL, table_name=None, schema=None, context=None):
if table_name:
self._table_name = normalize_name(table_name)
Expand Down Expand Up @@ -168,7 +184,7 @@ def download(self, limit=None, decode_geom=False, retry_times=DEFAULT_RETRY_TIME
def delete(self):
if self.exists():
self._cc.sql_client.send(self._drop_table_query(False))
self._is_saved_in_carto = False
self._unsync()
return True

return False
Expand Down Expand Up @@ -384,6 +400,13 @@ def _map_geom_type(self, geom_type):
'MultiPolygon': Dataset.GEOM_TYPE_POLYGON
}[geom_type]

def _get_dataset_info(self):
return DatasetInfo(self._cc, self._table_name)

def _unsync(self):
self._is_saved_in_carto = False
self._dataset_info = None


def recursive_read(context, query, retry_times=Dataset.DEFAULT_RETRY_TIMES):
try:
Expand Down
74 changes: 74 additions & 0 deletions cartoframes/datasets/dataset_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import time
from warnings import warn

from carto.datasets import DatasetManager
from carto.exceptions import CartoException

from ..columns import normalize_name


class DatasetInfo():
PRIVATE = 'PRIVATE'
PUBLIC = 'PUBLIC'
LINK = 'LINK'

def __init__(self, carto_context, table_name):
self._metadata = self._get_metadata(carto_context, table_name)
self.privacy = self._metadata.privacy
self.name = self._metadata.name
simon-contreras-deel marked this conversation as resolved.
Show resolved Hide resolved

def update(self, privacy=None, name=None):
modified = False

if privacy and self._validate_privacy(privacy):
self.privacy = privacy.upper()
modified = True

if name:
normalized_name = normalize_name(name)
if self._validate_name(normalized_name):
self.name = normalized_name
modified = True

if normalized_name != name:
warn('Dataset name will be named `{}`'.format(self.name))

if modified:
self._save_metadata()

def _get_metadata(self, carto_context, table_name, retries=6, retry_wait_time=1):
ds_manager = DatasetManager(carto_context.auth_client)
try:
return ds_manager.get(table_name)
except Exception as e:
if type(e).__name__ == 'NotFoundException' and retries > 0:
# if retry_wait_time > 7: # it should be after more than 15 seconds
# warn('We are still procesing the CARTO table. Sorry for the delay.')
time.sleep(retry_wait_time)
self._get_metadata(carto_context=carto_context, table_name=table_name,
retries=retries-1, retry_wait_time=retry_wait_time*2)
else:
raise CartoException('We could not get the table metadata.'
'Please, try again in a few seconds or contact support for help')

def _save_metadata(self):
self._metadata.privacy = self.privacy
self._metadata.name = self.name
self._metadata.save()

def _validate_privacy(self, privacy):
privacy = privacy.upper()
if privacy not in [self.PRIVATE, self.PUBLIC, self.LINK]:
raise ValueError('Wrong privacy. The privacy: {p} is not valid. You can use: {o1}, {o2}, {o3}'.format(
p=privacy, o1=self.PRIVATE, o2=self.PUBLIC, o3=self.LINK))

if privacy != self.privacy:
return True

return False

def _validate_name(self, name):
if name != self.name:
return True

return False
2 changes: 1 addition & 1 deletion cartoframes/vis/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Layer(object):
"""Layer

Args:
source (str, :py:class:`Dataset <cartoframes.Dataset>`,
source (str, :py:class:`Dataset <cartoframes.datasets.Dataset>`,
:py:class:`Source <cartoframes.vis.Source>`): The source data.
style (str, dict, :py:class:`Style <cartoframes.vis.Style>`,
optional): The style of the visualization: `CARTO VL styling
Expand Down
2 changes: 1 addition & 1 deletion cartoframes/vis/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import re

from . import defaults
from ..dataset import Dataset
from ..geojson import get_encoded_data, get_bounds
from cartoframes.datasets import Dataset

try:
import geopandas
Expand Down
Empty file added test/mocks/__init__.py
Empty file.
16 changes: 16 additions & 0 deletions test/mocks/context_mock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-


class CredsMock():
simon-contreras-deel marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One question. Did you look for a mock library FactoryGirl style?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nop

def __init__(self, key=None, username=None):
self._key = key
self._username = username

def username(self):
return self._username


class ContextMock():
simon-contreras-deel marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, username, api_key):
self.is_org = True
self.creds = CredsMock(key=api_key, username=username)
40 changes: 40 additions & 0 deletions test/mocks/dataset_mock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-

import pandas as pd

from cartoframes.datasets import Dataset, DatasetInfo


class MetadataMock():
simon-contreras-deel marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self):
self.privacy = DatasetInfo.PRIVATE
self.name = None


class DatasetInfoMock(DatasetInfo):
def _get_metadata(self, _1, _2):
return MetadataMock()

def _save_metadata(self):
return True


class DatasetMock(Dataset):
simon-contreras-deel marked this conversation as resolved.
Show resolved Hide resolved
def download(self):
self._df = pd.DataFrame({'column_name': [1]})
return self._df

def _copyfrom(self, _):
return True

def _create_table(self, _):
return True

def _create_table_from_query(self):
return True

def exists(self):
return False

def _get_dataset_info(self):
return DatasetInfoMock(self._cc, self._table_name)
2 changes: 1 addition & 1 deletion test/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import IPython

import cartoframes
from cartoframes.dataset import Dataset
from cartoframes.datasets import Dataset
from cartoframes.columns import Column, normalize_name
from cartoframes.utils import dict_items

Expand Down
69 changes: 68 additions & 1 deletion test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
import sys
import json
import warnings
import pandas as pd

from carto.exceptions import CartoException

from cartoframes.context import CartoContext
from cartoframes.dataset import Dataset, _decode_geom
from cartoframes.datasets import Dataset, _decode_geom
from cartoframes.columns import normalize_name
from cartoframes.geojson import load_geojson
from mocks.dataset_mock import DatasetMock
from mocks.context_mock import ContextMock

from utils import _UserUrlLoader

Expand Down Expand Up @@ -483,3 +486,67 @@ def assertNotExistsTable(self, table_name):
'''.format(table=table_name))
except CartoException as e:
self.assertTrue('relation "{}" does not exist'.format(table_name) in str(e))


class TestDatasetInfo(unittest.TestCase):
def setUp(self):
self.username = 'fake_username'
self.api_key = 'fake_api_key'
self.context = ContextMock(username=self.username, api_key=self.api_key)

def test_dataset_get_privacy_from_new_table(self):
query = 'SELECT 1'
dataset = DatasetMock.from_query(query=query, context=self.context)
dataset.upload(table_name='fake_table')
self.assertEqual(dataset.get_dataset_info().privacy, Dataset.PRIVATE)

def test_dataset_get_privacy_from_not_sync(self):
query = 'SELECT 1'
dataset = DatasetMock.from_query(query=query, context=self.context)
error_msg = ('Your data is not synchronized with CARTO.'
'First of all, you should call upload method to save your data in CARTO.')
with self.assertRaises(CartoException, msg=error_msg):
dataset.get_dataset_info()

def test_dataset_set_privacy_to_new_table(self):
query = 'SELECT 1'
dataset = DatasetMock.from_query(query=query, context=self.context)
dataset.upload(table_name='fake_table')
dataset.set_dataset_info(privacy=Dataset.PUBLIC)
self.assertEqual(dataset.get_dataset_info().privacy, Dataset.PUBLIC)

def test_dataset_set_privacy_with_wrong_parameter(self):
query = 'SELECT 1'
dataset = DatasetMock.from_query(query=query, context=self.context)
dataset.upload(table_name='fake_table')
wrong_privacy = 'wrong_privacy'
error_msg = 'Wrong privacy. The privacy: {p} is not valid. You can use: {o1}, {o2}, {o3}'.format(
p=wrong_privacy, o1=Dataset.PRIVATE, o2=Dataset.PUBLIC, o3=Dataset.LINK)
with self.assertRaises(ValueError, msg=error_msg):
dataset.set_dataset_info(privacy=wrong_privacy)

def test_dataset_info_should_work_from_table(self):
table_name = 'fake_table'
dataset = DatasetMock.from_table(table_name=table_name, context=self.context)
dataset.download()
self.assertEqual(dataset.get_dataset_info().privacy, Dataset.PRIVATE)

simon-contreras-deel marked this conversation as resolved.
Show resolved Hide resolved
def test_dataset_info_should_fail_after_unsync_by_set_dataframe(self):
table_name = 'fake_table'
dataset = DatasetMock.from_table(table_name=table_name, context=self.context)
dataset.download()
dataset.set_dataframe(pd.DataFrame({'column_name': [2]}))
error_msg = ('Your data is not synchronized with CARTO.'
'First of all, you should call upload method to save your data in CARTO.')
with self.assertRaises(CartoException, msg=error_msg):
dataset.get_dataset_info()

def test_dataset_info_should_fail_after_unsync_by_set_geodataframe(self):
table_name = 'fake_table'
dataset = DatasetMock.from_table(table_name=table_name, context=self.context)
dataset.download()
dataset.set_geodataframe(pd.DataFrame({'column_name': [2]}))
error_msg = ('Your data is not synchronized with CARTO.'
'First of all, you should call upload method to save your data in CARTO.')
with self.assertRaises(CartoException, msg=error_msg):
dataset.get_dataset_info()
Loading