CartoDB · andy-esch · Dec 18, 2018 · Dec 14, 2018 · Dec 14, 2018 · Dec 18, 2018
diff --git a/.gitignore b/.gitignore
@@ -32,6 +32,8 @@ wheels/
 *.egg-info/
 .installed.cfg
 *.egg
+Pipfile
+Pipfile.lock
 
 # Swap files
 .*.sw[nop]
@@ -40,3 +42,4 @@ wheels/
 CARTOCREDS.json
 SITEKEY.txt
 test/secret.json
+examples/scratch/*
diff --git a/cartoframes/context.py b/cartoframes/context.py
@@ -520,7 +520,7 @@ def _send_batches(self, df, table_name, temp_dir, geom_col, pgcolnames,
         # combine chunks into final table
         try:
             select_base = 'SELECT {schema} FROM "{{table}}"'.format(
-                schema=_df2pg_schema(df, pgcolnames))
+                schema=utils.df2pg_schema(df, pgcolnames))
             unioned_tables = '\nUNION ALL\n'.join([select_base.format(table=t)
                                                    for t in subtables])
             self._debug_print(unioned=unioned_tables)
@@ -651,7 +651,7 @@ def _set_schema(self, dataframe, table_name, pgcolnames):
                       'NULLIF("{col}", \'\')::{ctype}')
         # alter non-util columns that are not type text
         alter_cols = ', '.join(alter_temp.format(col=c,
-                                                 ctype=_dtypes2pg(t))
+                                                 ctype=utils.dtypes2pg(t))
                                for c, t in zip(pgcolnames,
                                                dataframe.dtypes)
                                if c not in util_cols and t != 'object')
@@ -1926,6 +1926,7 @@ def _debug_print(self, **kwargs):
                                           value=str_value))
 
 
+# TODO: move all of the below to the utils module
 def _add_encoded_geom(df, geom_col):
     """Add encoded geometry to DataFrame"""
     # None if not a GeoDataFrame
@@ -1985,42 +1986,3 @@ def _decode_geom(ewkb):
     if ewkb:
         return wkb.loads(ba.unhexlify(ewkb))
     return None
-
-
-def _dtypes2pg(dtype):
-    """Returns equivalent PostgreSQL type for input `dtype`"""
-    mapping = {
-        'float64': 'numeric',
-        'int64': 'numeric',
-        'float32': 'numeric',
-        'int32': 'numeric',
-        'object': 'text',
-        'bool': 'boolean',
-        'datetime64[ns]': 'timestamp',
-    }
-    return mapping.get(str(dtype), 'text')
-
-
-def _pg2dtypes(pgtype):
-    """Returns equivalent dtype for input `pgtype`."""
-    mapping = {
-        'date': 'datetime64[ns]',
-        'number': 'float64',
-        'string': 'object',
-        'boolean': 'bool',
-        'geometry': 'object',
-    }
-    return mapping.get(str(pgtype), 'object')
-
-
-def _df2pg_schema(dataframe, pgcolnames):
-    """Print column names with PostgreSQL schema for the SELECT statement of
-    a SQL query"""
-    schema = ', '.join([
-        'NULLIF("{col}", \'\')::{t} AS {col}'.format(col=c,
-                                                     t=_dtypes2pg(t))
-        for c, t in zip(pgcolnames, dataframe.dtypes)
-        if c not in ('the_geom', 'the_geom_webmercator', 'cartodb_id')])
-    if 'the_geom' in pgcolnames:
-        return '"the_geom", ' + schema
-    return schema
diff --git a/cartoframes/utils.py b/cartoframes/utils.py
@@ -1,17 +1,24 @@
+"""general utility functions"""
 import sys
-from tqdm import tqdm
 from functools import wraps
 from warnings import filterwarnings, catch_warnings
 
+from tqdm import tqdm
+
+
+def dict_items(indict):
+    """function for iterating through dict items compatible with py2 and 3
 
-def dict_items(d):
+    Args:
+        indict (dict): Dictionary that will be turned into items iterator
+    """
     if sys.version_info >= (3, 0):
-        return d.items()
-    else:
-        return d.iteritems()
+        return indict.items()
+    return indict.iteritems()
 
 
 def cssify(css_dict):
+    """Function to get CartoCSS from Python dicts"""
     css = ''
     for key, value in dict_items(css_dict):
         css += '{key} {{ '.format(key=key)
@@ -61,9 +68,9 @@ def norm_colname(colname):
     """
     last_char_special = False
     char_list = []
-    for e in str(colname):
-        if e.isalnum():
-            char_list.append(e.lower())
+    for colchar in str(colname):
+        if colchar.isalnum():
+            char_list.append(colchar.lower())
             last_char_special = False
         else:
             if not last_char_special:
@@ -128,3 +135,47 @@ def wrapper(*args, **kwargs):
             evaled_func = func(*args, **kwargs)
         return evaled_func
     return wrapper
+
+
+# schema definition functions
+def dtypes2pg(dtype):
+    """Returns equivalent PostgreSQL type for input `dtype`"""
+    mapping = {
+        'float64': 'numeric',
+        'int64': 'numeric',
+        'float32': 'numeric',
+        'int32': 'numeric',
+        'object': 'text',
+        'bool': 'boolean',
+        'datetime64[ns]': 'timestamp',
+    }
+    return mapping.get(str(dtype), 'text')
+
+
+# NOTE: this is not currently used anywhere
+def pg2dtypes(pgtype):
+    """Returns equivalent dtype for input `pgtype`."""
+    mapping = {
+        'date': 'datetime64[ns]',
+        'number': 'float64',
+        'string': 'object',
+        'boolean': 'bool',
+        'geometry': 'object',
+    }
+    return mapping.get(str(pgtype), 'object')
+
+
+def df2pg_schema(dataframe, pgcolnames):
+    """Print column names with PostgreSQL schema for the SELECT statement of
+    a SQL query"""
+    util_cols = set(('the_geom', 'the_geom_webmercator', 'cartodb_id'))
+    if set(dataframe.columns).issubset(util_cols):
+        return ', '.join(dataframe.columns)
+    schema = ', '.join([
+        'NULLIF("{col}", \'\')::{t} AS {col}'.format(col=c,
+                                                     t=dtypes2pg(t))
+        for c, t in zip(pgcolnames, dataframe.dtypes)
+        if c not in util_cols])
+    if 'the_geom' in pgcolnames:
+        return '"the_geom", ' + schema
+    return schema
diff --git a/test/test_context.py b/test/test_context.py
@@ -789,31 +789,6 @@ def test_cartocontext_check_query(self):
         with self.assertRaises(ValueError):
             cc._check_query(success_query, style_cols=fail_cols)
 
-    def test_df2pg_schema(self):
-        """context._df2pg_schema"""
-        from cartoframes.context import _df2pg_schema
-        data = [{'id': 'a', 'val': 1.1, 'truth': True, 'idnum': 1},
-                {'id': 'b', 'val': 2.2, 'truth': True, 'idnum': 2},
-                {'id': 'c', 'val': 3.3, 'truth': False, 'idnum': 3}]
-        df = pd.DataFrame(data).astype({'id': 'object',
-                                        'val': float,
-                                        'truth': bool,
-                                        'idnum': int})
-        # specify order of columns
-        df = df[['id', 'val', 'truth', 'idnum']]
-        pgcols = ['id', 'val', 'truth', 'idnum']
-        ans = ('NULLIF("id", \'\')::text AS id, '
-               'NULLIF("val", \'\')::numeric AS val, '
-               'NULLIF("truth", \'\')::boolean AS truth, '
-               'NULLIF("idnum", \'\')::numeric AS idnum')
-
-        self.assertEqual(ans, _df2pg_schema(df, pgcols))
-
-        # add the_geom
-        df['the_geom'] = 'Point(0 0)'
-        ans = '\"the_geom\", ' + ans
-        pgcols.append('the_geom')
-        self.assertEqual(ans, _df2pg_schema(df, pgcols))
 
     @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
     def test_add_encoded_geom(self):
@@ -875,37 +850,6 @@ def test_encode_geom(self):
         self.assertEqual(ewkb_resp, ewkb)
         self.assertIsNone(_encode_geom(None))
 
-    def test_dtypes2pg(self):
-        """context._dtypes2pg"""
-        from cartoframes.context import _dtypes2pg
-        results = {
-            'float64': 'numeric',
-            'int64': 'numeric',
-            'float32': 'numeric',
-            'int32': 'numeric',
-            'object': 'text',
-            'bool': 'boolean',
-            'datetime64[ns]': 'timestamp',
-            'unknown_dtype': 'text'
-        }
-        for i in results:
-            self.assertEqual(_dtypes2pg(i), results[i])
-
-    def test_pg2dtypes(self):
-        """context._pg2dtypes"""
-        from cartoframes.context import _pg2dtypes
-        results = {
-            'date': 'datetime64[ns]',
-            'number': 'float64',
-            'string': 'object',
-            'boolean': 'bool',
-            'geometry': 'object',
-            'unknown_pgdata': 'object'
-        }
-        for i in results:
-            result = _pg2dtypes(i)
-            self.assertEqual(result, results[i])
-
     def test_debug_print(self):
         """context._debug_print"""
         cc = cartoframes.CartoContext(base_url=self.baseurl,

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -1,8 +1,11 @@
 """Unit tests for cartoframes.utils"""
 import unittest
+from collections import OrderedDict
+
+import pandas as pd
+
 from cartoframes.utils import (dict_items, cssify, norm_colname,
                                normalize_colnames, importify_params)
-from collections import OrderedDict
 
 
 class TestUtils(unittest.TestCase):
@@ -139,3 +142,60 @@ def test_importify_params(self):
         ans = ('true', 'false', 'true', 'gulab jamon', )
         for idx, p in enumerate(params):
             self.assertTrue(importify_params(p), ans[idx])
+
+    def test_dtypes2pg(self):
+        """utils.dtypes2pg"""
+        from cartoframes.utils import dtypes2pg
+        results = {
+            'float64': 'numeric',
+            'int64': 'numeric',
+            'float32': 'numeric',
+            'int32': 'numeric',
+            'object': 'text',
+            'bool': 'boolean',
+            'datetime64[ns]': 'timestamp',
+            'unknown_dtype': 'text'
+        }
+        for i in results:
+            self.assertEqual(dtypes2pg(i), results[i])
+
+    def test_pg2dtypes(self):
+        """context._pg2dtypes"""
+        from cartoframes.utils import pg2dtypes
+        results = {
+            'date': 'datetime64[ns]',
+            'number': 'float64',
+            'string': 'object',
+            'boolean': 'bool',
+            'geometry': 'object',
+            'unknown_pgdata': 'object'
+        }
+        for i in results:
+            result = pg2dtypes(i)
+            self.assertEqual(result, results[i])
+
+    def test_df2pg_schema(self):
+        """utils.df2pg_schema"""
+        from cartoframes.utils import df2pg_schema
+        data = [{'id': 'a', 'val': 1.1, 'truth': True, 'idnum': 1},
+                {'id': 'b', 'val': 2.2, 'truth': True, 'idnum': 2},
+                {'id': 'c', 'val': 3.3, 'truth': False, 'idnum': 3}]
+        df = pd.DataFrame(data).astype({'id': 'object',
+                                        'val': float,
+                                        'truth': bool,
+                                        'idnum': int})
+        # specify order of columns
+        df = df[['id', 'val', 'truth', 'idnum']]
+        pgcols = ['id', 'val', 'truth', 'idnum']
+        ans = ('NULLIF("id", \'\')::text AS id, '
+               'NULLIF("val", \'\')::numeric AS val, '
+               'NULLIF("truth", \'\')::boolean AS truth, '
+               'NULLIF("idnum", \'\')::numeric AS idnum')
+
+        self.assertEqual(ans, df2pg_schema(df, pgcols))
+
+        # add the_geom
+        df['the_geom'] = 'Point(0 0)'
+        ans = '\"the_geom\", ' + ans
+        pgcols.append('the_geom')
+        self.assertEqual(ans, df2pg_schema(df, pgcols))