CartoDB · simon-contreras-deel · Oct 9, 2019 · Oct 2, 2019 · Oct 3, 2019 · Oct 3, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ## Changed
 - Remove pandas extension in catalog classes (#1038)
+- Download dataset and geographies (#1050)
 
 ## [1.0b3] - 2019-08-27
 ### Added
@@ -282,4 +283,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Adds a compression option for write operations
 
 ### Fixed
-- Fixes file system path creation to be generic to OS
+- Fixes file system path creation to be generic to OS
diff --git a/cartoframes/auth/credentials.py b/cartoframes/auth/credentials.py
@@ -12,6 +12,9 @@
 
 from ..__version__ import __version__
 
+from warnings import filterwarnings
+filterwarnings("ignore", category=FutureWarning, module="carto")
+
 if sys.version_info >= (3, 0):
     from urllib.parse import urlparse
 else:
@@ -208,6 +211,9 @@ def get_do_token(self):
 
         return token.access_token
 
+    def get_do_dataset(self):
+        return self._username.replace('-', '_')
+
     def get_api_key_auth_client(self):
         if not self._api_key_auth_client:
             self._api_key_auth_client = APIKeyAuthClient(

diff --git a/cartoframes/data/clients/bigquery_client.py b/cartoframes/data/clients/bigquery_client.py
@@ -1,5 +1,10 @@
 from __future__ import absolute_import
 
+import os
+import appdirs
+import csv
+import tqdm
+
 from google.cloud import bigquery
 from google.oauth2.credentials import Credentials as GoogleCredentials
 from google.auth.exceptions import RefreshError
@@ -8,6 +13,8 @@
 
 from ...auth import get_default_credentials
 
+_USER_CONFIG_DIR = appdirs.user_config_dir('cartoframes')
+
 
 def refresh_client(func):
     def wrapper(self, *args, **kwargs):
@@ -52,6 +59,54 @@ def upload_dataframe(self, dataframe, schema, tablename, project, dataset):
 
     @refresh_client
     def query(self, query, **kwargs):
-        response = self.client.query(query, **kwargs)
+        return self.client.query(query, **kwargs)
+
+    @refresh_client
+    def get_table(self, project, dataset, table):
+        full_table_name = '{}.{}.{}'.format(project, dataset, table)
+        return self.client.get_table(full_table_name)
+
+    def get_table_column_names(self, project, dataset, table):
+        table_info = self.get_table(project, dataset, table)
+        return [field.name for field in table_info.schema]
+
+    def download_to_file(self, project, dataset, table, limit=None, offset=None,
+                         file_path=None, fail_if_exists=False, progress_bar=True):
+        if not file_path:
+            file_name = '{}.{}.{}.csv'.format(project, dataset, table)
+            file_path = os.path.join(_USER_CONFIG_DIR, file_name)
+
+        if fail_if_exists and os.path.isfile(file_path):
+            raise CartoException('The file `{}` already exists.'.format(file_path))
+
+        column_names = self.get_table_column_names(project, dataset, table)
+
+        query = _download_query(project, dataset, table, limit, offset)
+        rows_iter = self.query(query).result()
+
+        if progress_bar:
+            pb = tqdm.tqdm_notebook(total=rows_iter.total_rows)
+
+        with open(file_path, 'w') as csvfile:
+            csvwriter = csv.writer(csvfile)
+
+            csvwriter.writerow(column_names)
+
+            for row in rows_iter:
+                csvwriter.writerow(row.values())
+                if progress_bar:
+                    pb.update(1)
+
+        return file_path
+
+
+def _download_query(project, dataset, table, limit=None, offset=None):
+    full_table_name = '`{}.{}.{}`'.format(project, dataset, table)
+    query = 'SELECT * FROM {}'.format(full_table_name)
+
+    if limit:
+        query += ' LIMIT {}'.format(limit)
+    if offset:
+        query += ' OFFSET {}'.format(offset)
 
-        return response
+    return query
diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py
@@ -16,7 +16,7 @@
 
 def enrich(query_function, **kwargs):
     credentials = _get_credentials(kwargs['credentials'])
-    user_dataset = credentials.username.replace('-', '_')
+    user_dataset = credentials.get_do_dataset()
     bq_client = _get_bigquery_client(_WORKING_PROJECT, credentials)
 
     data_copy = _prepare_data(kwargs['data'], kwargs['data_geom_column'])

diff --git a/cartoframes/data/observatory/dataset.py b/cartoframes/data/observatory/dataset.py
@@ -73,3 +73,16 @@ def is_public_data(self):
     @property
     def summary(self):
         return self.data['summary_jsonb']
+
+    def download(self, credentials=None):
+        """Download Dataset data.
+
+        Args:
+            credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`, optional):
+              credentials of CARTO user account. If not provided,
+              a default credentials (if set with :py:meth:`set_default_credentials
+              <cartoframes.auth.set_default_credentials>`) will be attempted to be
+              used.
+        """
+
+        return self._download(credentials)
diff --git a/cartoframes/data/observatory/entity.py b/cartoframes/data/observatory/entity.py
@@ -1,11 +1,21 @@
 import pandas as pd
+from warnings import warn
+
+from google.api_core.exceptions import NotFound
+
+from carto.exceptions import CartoException
+
+from ..clients.bigquery_client import BigQueryClient
+from ...auth import get_default_credentials
 
 try:
     from abc import ABC, abstractmethod
 except ImportError:
     from abc import ABCMeta, abstractmethod
     ABC = ABCMeta('ABC', (object,), {'__slots__': ()})
 
+_WORKING_PROJECT = 'carto-do-customers'
+
 
 class CatalogEntity(ABC):
 
@@ -45,6 +55,32 @@ def __str__(self):
     def __repr__(self):
         return '{classname}({entity_id})'.format(classname=self.__class__.__name__, entity_id=self.id)
 
+    def _download(self, credentials=None):
+        credentials = _get_credentials(credentials)
+        user_dataset = credentials.get_do_dataset()
+        bq_client = _get_bigquery_client(_WORKING_PROJECT, credentials)
+
+        project, dataset, table = self.id.split('.')
+        view = 'view_{}_{}'.format(dataset.replace('-', '_'), table)
+
+        try:
+            file_path = bq_client.download_to_file(_WORKING_PROJECT, user_dataset, view)
+        except NotFound:
+            raise CartoException('You have not purchased the dataset `{}` yet'.format(self.id))
+
+        warn('Data saved: {}.'.format(file_path))
+        warn("To read it you can do: `pandas.read_csv('{}')`.".format(file_path))
+
+        return file_path
+
+
+def _get_credentials(credentials=None):
+    return credentials or get_default_credentials()
+
+
+def _get_bigquery_client(project, credentials):
+    return BigQueryClient(project, credentials)
+
 
 class CatalogList(list):
 

diff --git a/cartoframes/data/observatory/geography.py b/cartoframes/data/observatory/geography.py
@@ -52,3 +52,16 @@ def is_public_data(self):
     @property
     def summary(self):
         return self.data['summary_jsonb']
+
+    def download(self, credentials=None):
+        """Download Geography data.
+
+        Args:
+            credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`, optional):
+              credentials of CARTO user account. If not provided,
+              a default credentials (if set with :py:meth:`set_default_credentials
+              <cartoframes.auth.set_default_credentials>`) will attempted to be
+              used.
+        """
+
+        return self._download(credentials)
diff --git a/examples/08_data_observatory/download.ipynb b/examples/08_data_observatory/download.ipynb
@@ -0,0 +1,137 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.auth import Credentials\n",
+    "credentials = Credentials.from_file()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Catalog Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.data.observatory.catalog import Catalog\n",
+    "dataset = Catalog().categories.get('financial').datasets.get('{dataset_id}')\n",
+    "dataset.to_series()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.download(credentials)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Catalog Geography "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.data.observatory.geography import Geography\n",
+    "geography = Geography.get(dataset.geography)\n",
+    "geography.to_series()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_path = geography.download(credentials)\n",
+    "file_path"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Upload downloaded csv file to CARTO "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv(file_path)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.data import Dataset\n",
+    "\n",
+    "Dataset(df).upload(table_name='test_do_geography', credentials=credentials, if_exists='replace')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Visualize it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.viz import Map, Layer\n",
+    "Map(Layer('test_do_geography', credentials=credentials))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}