From 1d8d81d00ec49651f325cffc9963fe7f42c46f17 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 14 Dec 2018 12:23:58 -0500 Subject: [PATCH 1/6] fixes case where dataframe is subset of util cols --- cartoframes/context.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cartoframes/context.py b/cartoframes/context.py index 48669b687..6383526b4 100644 --- a/cartoframes/context.py +++ b/cartoframes/context.py @@ -2016,11 +2016,15 @@ def _pg2dtypes(pgtype): def _df2pg_schema(dataframe, pgcolnames): """Print column names with PostgreSQL schema for the SELECT statement of a SQL query""" + util_cols = set(('the_geom', 'the_geom_webmercator', 'cartodb_id')) + if set(dataframe.columns).issubset(util_cols): + print(f'subset: {", ".join(dataframe.columns)}') + return ', '.join(dataframe.columns) schema = ', '.join([ 'NULLIF("{col}", \'\')::{t} AS {col}'.format(col=c, t=_dtypes2pg(t)) for c, t in zip(pgcolnames, dataframe.dtypes) - if c not in ('the_geom', 'the_geom_webmercator', 'cartodb_id')]) + if c not in util_cols]) if 'the_geom' in pgcolnames: return '"the_geom", ' + schema return schema From 7faeb51d4852eefb50c28dbb57270d4aace450fc Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 14 Dec 2018 12:35:49 -0500 Subject: [PATCH 2/6] removes print with f-string --- cartoframes/context.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cartoframes/context.py b/cartoframes/context.py index 6383526b4..a806a3166 100644 --- a/cartoframes/context.py +++ b/cartoframes/context.py @@ -2018,7 +2018,6 @@ def _df2pg_schema(dataframe, pgcolnames): a SQL query""" util_cols = set(('the_geom', 'the_geom_webmercator', 'cartodb_id')) if set(dataframe.columns).issubset(util_cols): - print(f'subset: {", ".join(dataframe.columns)}') return ', '.join(dataframe.columns) schema = ', '.join([ 'NULLIF("{col}", \'\')::{t} AS {col}'.format(col=c, From d63aead127ae936cb816920a2e4f789dcbbfbddb Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Dec 2018 08:14:44 -0500 Subject: [PATCH 3/6] moves util-like functions to utils module --- cartoframes/context.py | 47 +++--------------------------------------- cartoframes/utils.py | 44 +++++++++++++++++++++++++++++++++++++++ test/test_context.py | 31 ---------------------------- test/test_utils.py | 32 ++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 75 deletions(-) diff --git a/cartoframes/context.py b/cartoframes/context.py index a806a3166..fc69919d2 100644 --- a/cartoframes/context.py +++ b/cartoframes/context.py @@ -520,7 +520,7 @@ def _send_batches(self, df, table_name, temp_dir, geom_col, pgcolnames, # combine chunks into final table try: select_base = 'SELECT {schema} FROM "{{table}}"'.format( - schema=_df2pg_schema(df, pgcolnames)) + schema=utils.df2pg_schema(df, pgcolnames)) unioned_tables = '\nUNION ALL\n'.join([select_base.format(table=t) for t in subtables]) self._debug_print(unioned=unioned_tables) @@ -651,7 +651,7 @@ def _set_schema(self, dataframe, table_name, pgcolnames): 'NULLIF("{col}", \'\')::{ctype}') # alter non-util columns that are not type text alter_cols = ', '.join(alter_temp.format(col=c, - ctype=_dtypes2pg(t)) + ctype=utils.dtypes2pg(t)) for c, t in zip(pgcolnames, dataframe.dtypes) if c not in util_cols and t != 'object') @@ -1926,6 +1926,7 @@ def _debug_print(self, **kwargs): value=str_value)) +# TODO: move all of the below to the utils module def _add_encoded_geom(df, geom_col): """Add encoded geometry to DataFrame""" # None if not a GeoDataFrame @@ -1985,45 +1986,3 @@ def _decode_geom(ewkb): if ewkb: return wkb.loads(ba.unhexlify(ewkb)) return None - - -def _dtypes2pg(dtype): - """Returns equivalent PostgreSQL type for input `dtype`""" - mapping = { - 'float64': 'numeric', - 'int64': 'numeric', - 'float32': 'numeric', - 'int32': 'numeric', - 'object': 'text', - 'bool': 'boolean', - 'datetime64[ns]': 'timestamp', - } - return mapping.get(str(dtype), 'text') - - -def _pg2dtypes(pgtype): - """Returns equivalent dtype for input `pgtype`.""" - mapping = { - 'date': 'datetime64[ns]', - 'number': 'float64', - 'string': 'object', - 'boolean': 'bool', - 'geometry': 'object', - } - return mapping.get(str(pgtype), 'object') - - -def _df2pg_schema(dataframe, pgcolnames): - """Print column names with PostgreSQL schema for the SELECT statement of - a SQL query""" - util_cols = set(('the_geom', 'the_geom_webmercator', 'cartodb_id')) - if set(dataframe.columns).issubset(util_cols): - return ', '.join(dataframe.columns) - schema = ', '.join([ - 'NULLIF("{col}", \'\')::{t} AS {col}'.format(col=c, - t=_dtypes2pg(t)) - for c, t in zip(pgcolnames, dataframe.dtypes) - if c not in util_cols]) - if 'the_geom' in pgcolnames: - return '"the_geom", ' + schema - return schema diff --git a/cartoframes/utils.py b/cartoframes/utils.py index fa884cdfd..e332bd33c 100644 --- a/cartoframes/utils.py +++ b/cartoframes/utils.py @@ -128,3 +128,47 @@ def wrapper(*args, **kwargs): evaled_func = func(*args, **kwargs) return evaled_func return wrapper + + +## schema definition functions +def dtypes2pg(dtype): + """Returns equivalent PostgreSQL type for input `dtype`""" + mapping = { + 'float64': 'numeric', + 'int64': 'numeric', + 'float32': 'numeric', + 'int32': 'numeric', + 'object': 'text', + 'bool': 'boolean', + 'datetime64[ns]': 'timestamp', + } + return mapping.get(str(dtype), 'text') + + +# NOTE: this is not currently used anywhere +def _pg2dtypes(pgtype): + """Returns equivalent dtype for input `pgtype`.""" + mapping = { + 'date': 'datetime64[ns]', + 'number': 'float64', + 'string': 'object', + 'boolean': 'bool', + 'geometry': 'object', + } + return mapping.get(str(pgtype), 'object') + + +def df2pg_schema(dataframe, pgcolnames): + """Print column names with PostgreSQL schema for the SELECT statement of + a SQL query""" + util_cols = set(('the_geom', 'the_geom_webmercator', 'cartodb_id')) + if set(dataframe.columns).issubset(util_cols): + return ', '.join(dataframe.columns) + schema = ', '.join([ + 'NULLIF("{col}", \'\')::{t} AS {col}'.format(col=c, + t=_dtypes2pg(t)) + for c, t in zip(pgcolnames, dataframe.dtypes) + if c not in util_cols]) + if 'the_geom' in pgcolnames: + return '"the_geom", ' + schema + return schema diff --git a/test/test_context.py b/test/test_context.py index 3360927e5..8aed459c5 100644 --- a/test/test_context.py +++ b/test/test_context.py @@ -875,37 +875,6 @@ def test_encode_geom(self): self.assertEqual(ewkb_resp, ewkb) self.assertIsNone(_encode_geom(None)) - def test_dtypes2pg(self): - """context._dtypes2pg""" - from cartoframes.context import _dtypes2pg - results = { - 'float64': 'numeric', - 'int64': 'numeric', - 'float32': 'numeric', - 'int32': 'numeric', - 'object': 'text', - 'bool': 'boolean', - 'datetime64[ns]': 'timestamp', - 'unknown_dtype': 'text' - } - for i in results: - self.assertEqual(_dtypes2pg(i), results[i]) - - def test_pg2dtypes(self): - """context._pg2dtypes""" - from cartoframes.context import _pg2dtypes - results = { - 'date': 'datetime64[ns]', - 'number': 'float64', - 'string': 'object', - 'boolean': 'bool', - 'geometry': 'object', - 'unknown_pgdata': 'object' - } - for i in results: - result = _pg2dtypes(i) - self.assertEqual(result, results[i]) - def test_debug_print(self): """context._debug_print""" cc = cartoframes.CartoContext(base_url=self.baseurl, diff --git a/test/test_utils.py b/test/test_utils.py index af4db3841..4d0a4c7c2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -139,3 +139,35 @@ def test_importify_params(self): ans = ('true', 'false', 'true', 'gulab jamon', ) for idx, p in enumerate(params): self.assertTrue(importify_params(p), ans[idx]) + + def test_dtypes2pg(self): + """utils.dtypes2pg""" + from cartoframes.utils import dtypes2pg + results = { + 'float64': 'numeric', + 'int64': 'numeric', + 'float32': 'numeric', + 'int32': 'numeric', + 'object': 'text', + 'bool': 'boolean', + 'datetime64[ns]': 'timestamp', + 'unknown_dtype': 'text' + } + for i in results: + self.assertEqual(dtypes2pg(i), results[i]) + + def test_pg2dtypes(self): + """context._pg2dtypes""" + from cartoframes.utils import pg2dtypes + results = { + 'date': 'datetime64[ns]', + 'number': 'float64', + 'string': 'object', + 'boolean': 'bool', + 'geometry': 'object', + 'unknown_pgdata': 'object' + } + for i in results: + result = pg2dtypes(i) + self.assertEqual(result, results[i]) + From 1d2682e7ec76c69bcad38bc3e21d5415aa01e499 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Dec 2018 08:16:22 -0500 Subject: [PATCH 4/6] adds more ignores --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 1c419964e..963aa6105 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ wheels/ *.egg-info/ .installed.cfg *.egg +Pipfile +Pipfile.lock # Swap files .*.sw[nop] @@ -40,3 +42,4 @@ wheels/ CARTOCREDS.json SITEKEY.txt test/secret.json +examples/scratch/* From 5659c75d10c18d4f956d3cdc84c2fcc4b31b48da Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Dec 2018 08:29:42 -0500 Subject: [PATCH 5/6] fixes misnamed function --- cartoframes/utils.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/cartoframes/utils.py b/cartoframes/utils.py index e332bd33c..8a2262aa3 100644 --- a/cartoframes/utils.py +++ b/cartoframes/utils.py @@ -1,17 +1,24 @@ +"""general utility functions""" import sys -from tqdm import tqdm from functools import wraps from warnings import filterwarnings, catch_warnings +from tqdm import tqdm + -def dict_items(d): +def dict_items(indict): + """function for iterating through dict items compatible with py2 and 3 + + Args: + indict (dict): Dictionary that will be turned into items iterator + """ if sys.version_info >= (3, 0): - return d.items() - else: - return d.iteritems() + return indict.items() + return indict.iteritems() def cssify(css_dict): + """Function to get CartoCSS from Python dicts""" css = '' for key, value in dict_items(css_dict): css += '{key} {{ '.format(key=key) @@ -61,9 +68,9 @@ def norm_colname(colname): """ last_char_special = False char_list = [] - for e in str(colname): - if e.isalnum(): - char_list.append(e.lower()) + for colchar in str(colname): + if colchar.isalnum(): + char_list.append(colchar.lower()) last_char_special = False else: if not last_char_special: @@ -130,7 +137,7 @@ def wrapper(*args, **kwargs): return wrapper -## schema definition functions +# schema definition functions def dtypes2pg(dtype): """Returns equivalent PostgreSQL type for input `dtype`""" mapping = { @@ -146,7 +153,7 @@ def dtypes2pg(dtype): # NOTE: this is not currently used anywhere -def _pg2dtypes(pgtype): +def pg2dtypes(pgtype): """Returns equivalent dtype for input `pgtype`.""" mapping = { 'date': 'datetime64[ns]', @@ -166,7 +173,7 @@ def df2pg_schema(dataframe, pgcolnames): return ', '.join(dataframe.columns) schema = ', '.join([ 'NULLIF("{col}", \'\')::{t} AS {col}'.format(col=c, - t=_dtypes2pg(t)) + t=dtypes2pg(t)) for c, t in zip(pgcolnames, dataframe.dtypes) if c not in util_cols]) if 'the_geom' in pgcolnames: From d3205ce873faf15681447e8907a43f4348b70de0 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 18 Dec 2018 08:56:12 -0500 Subject: [PATCH 6/6] moves over test for schema --- test/test_context.py | 25 ------------------------- test/test_utils.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/test/test_context.py b/test/test_context.py index 8aed459c5..546007bac 100644 --- a/test/test_context.py +++ b/test/test_context.py @@ -789,31 +789,6 @@ def test_cartocontext_check_query(self): with self.assertRaises(ValueError): cc._check_query(success_query, style_cols=fail_cols) - def test_df2pg_schema(self): - """context._df2pg_schema""" - from cartoframes.context import _df2pg_schema - data = [{'id': 'a', 'val': 1.1, 'truth': True, 'idnum': 1}, - {'id': 'b', 'val': 2.2, 'truth': True, 'idnum': 2}, - {'id': 'c', 'val': 3.3, 'truth': False, 'idnum': 3}] - df = pd.DataFrame(data).astype({'id': 'object', - 'val': float, - 'truth': bool, - 'idnum': int}) - # specify order of columns - df = df[['id', 'val', 'truth', 'idnum']] - pgcols = ['id', 'val', 'truth', 'idnum'] - ans = ('NULLIF("id", \'\')::text AS id, ' - 'NULLIF("val", \'\')::numeric AS val, ' - 'NULLIF("truth", \'\')::boolean AS truth, ' - 'NULLIF("idnum", \'\')::numeric AS idnum') - - self.assertEqual(ans, _df2pg_schema(df, pgcols)) - - # add the_geom - df['the_geom'] = 'Point(0 0)' - ans = '\"the_geom\", ' + ans - pgcols.append('the_geom') - self.assertEqual(ans, _df2pg_schema(df, pgcols)) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_add_encoded_geom(self): diff --git a/test/test_utils.py b/test/test_utils.py index 4d0a4c7c2..a063eeac1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,8 +1,11 @@ """Unit tests for cartoframes.utils""" import unittest +from collections import OrderedDict + +import pandas as pd + from cartoframes.utils import (dict_items, cssify, norm_colname, normalize_colnames, importify_params) -from collections import OrderedDict class TestUtils(unittest.TestCase): @@ -171,3 +174,28 @@ def test_pg2dtypes(self): result = pg2dtypes(i) self.assertEqual(result, results[i]) + def test_df2pg_schema(self): + """utils.df2pg_schema""" + from cartoframes.utils import df2pg_schema + data = [{'id': 'a', 'val': 1.1, 'truth': True, 'idnum': 1}, + {'id': 'b', 'val': 2.2, 'truth': True, 'idnum': 2}, + {'id': 'c', 'val': 3.3, 'truth': False, 'idnum': 3}] + df = pd.DataFrame(data).astype({'id': 'object', + 'val': float, + 'truth': bool, + 'idnum': int}) + # specify order of columns + df = df[['id', 'val', 'truth', 'idnum']] + pgcols = ['id', 'val', 'truth', 'idnum'] + ans = ('NULLIF("id", \'\')::text AS id, ' + 'NULLIF("val", \'\')::numeric AS val, ' + 'NULLIF("truth", \'\')::boolean AS truth, ' + 'NULLIF("idnum", \'\')::numeric AS idnum') + + self.assertEqual(ans, df2pg_schema(df, pgcols)) + + # add the_geom + df['the_geom'] = 'Point(0 0)' + ans = '\"the_geom\", ' + ans + pgcols.append('the_geom') + self.assertEqual(ans, df2pg_schema(df, pgcols))