From de6e6f0e374084c232e2892ede53edcafe7e1024 Mon Sep 17 00:00:00 2001 From: Arik Fraimovich Date: Mon, 7 Oct 2019 11:20:09 +0300 Subject: [PATCH 1/2] Postgres: make sure table from the public schema doesn't get merged with a table from another schema. --- redash/query_runner/pg.py | 38 ++++++++++++++++++++++++++--------- tests/query_runner/test_pg.py | 23 +++++++++++++++++++++ 2 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 tests/query_runner/test_pg.py diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py index e9e4cc5431..e350e38bcc 100644 --- a/redash/query_runner/pg.py +++ b/redash/query_runner/pg.py @@ -63,6 +63,33 @@ def _wait(conn, timeout=None): raise psycopg2.OperationalError("select.error received") +def build_schema(query_result, schema): + # By default we omit the public schema name from the table name. But there are + # edge cases, where this might cause conflicts. For example: + # * We have a schema named "main" with table "users". + # * We have a table named "main.users" in the public schema. + # (while this feels unlikely, this actually happened) + # In this case if we omit the schema name for the public table, we will have + # a conflict. + + full_table_name = lambda r: u'{}.{}'.format(r['table_schema'], r['table_name']) + table_names = set(map(full_table_name, query_result['rows'])) + + for row in query_result['rows']: + if row['table_schema'] != 'public': + table_name = full_table_name(row) + else: + if row['table_name'] in table_names: + table_name = u'{}."{}"'.format(row['table_schema'], row['table_name']) + else: + table_name = row['table_name'] + + if table_name not in schema: + schema[table_name] = {'name': table_name, 'columns': []} + + schema[table_name]['columns'].append(row['column_name']) + + class PostgreSQL(BaseSQLQueryRunner): noop_query = "SELECT 1" @@ -112,17 +139,8 @@ def _get_definitions(self, schema, query): results = json_loads(results) - for row in results['rows']: - if row['table_schema'] != 'public': - table_name = u'{}.{}'.format(row['table_schema'], - row['table_name']) - else: - table_name = row['table_name'] - - if table_name not in schema: - schema[table_name] = {'name': table_name, 'columns': []} + build_schema(results, schema) - schema[table_name]['columns'].append(row['column_name']) def _get_tables(self, schema): ''' diff --git a/tests/query_runner/test_pg.py b/tests/query_runner/test_pg.py new file mode 100644 index 0000000000..6b244cfede --- /dev/null +++ b/tests/query_runner/test_pg.py @@ -0,0 +1,23 @@ + +from unittest import TestCase +from redash.query_runner.pg import build_schema + + +class TestBuildSchema(TestCase): + def test_handles_dups_between_public_and_other_schemas(self): + results = { + 'rows': [ + {'table_schema': 'public', 'table_name': 'main.users', 'column_name': 'id'}, + {'table_schema': 'main', 'table_name': 'users', 'column_name': 'id'}, + {'table_schema': 'main', 'table_name': 'users', 'column_name': 'name'}, + ] + } + + schema = {} + + build_schema(results, schema) + + self.assertIn('main.users', schema.keys()) + self.assertListEqual(schema['main.users']['columns'], ['id', 'name']) + self.assertIn('public."main.users"', schema.keys()) + self.assertListEqual(schema['public."main.users"']['columns'], ['id']) \ No newline at end of file From 851bbf36699ff6af69deb7c3393138a7408be459 Mon Sep 17 00:00:00 2001 From: Arik Fraimovich Date: Mon, 7 Oct 2019 11:44:33 +0300 Subject: [PATCH 2/2] PEP8 updates --- redash/query_runner/pg.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py index e350e38bcc..f6060d431d 100644 --- a/redash/query_runner/pg.py +++ b/redash/query_runner/pg.py @@ -63,24 +63,29 @@ def _wait(conn, timeout=None): raise psycopg2.OperationalError("select.error received") +def full_table_name(schema, name): + if '.' in name: + name = u'"{}"'.format(name) + + return u'{}.{}'.format(schema, name) + + def build_schema(query_result, schema): - # By default we omit the public schema name from the table name. But there are + # By default we omit the public schema name from the table name. But there are # edge cases, where this might cause conflicts. For example: # * We have a schema named "main" with table "users". - # * We have a table named "main.users" in the public schema. + # * We have a table named "main.users" in the public schema. # (while this feels unlikely, this actually happened) # In this case if we omit the schema name for the public table, we will have # a conflict. - - full_table_name = lambda r: u'{}.{}'.format(r['table_schema'], r['table_name']) - table_names = set(map(full_table_name, query_result['rows'])) + table_names = set(map(lambda r: full_table_name(r['table_schema'], r['table_name']), query_result['rows'])) for row in query_result['rows']: if row['table_schema'] != 'public': - table_name = full_table_name(row) + table_name = full_table_name(row['table_schema'], row['table_name']) else: if row['table_name'] in table_names: - table_name = u'{}."{}"'.format(row['table_schema'], row['table_name']) + table_name = full_table_name(row['table_schema'], row['table_name']) else: table_name = row['table_name'] @@ -141,7 +146,6 @@ def _get_definitions(self, schema, query): build_schema(results, schema) - def _get_tables(self, schema): ''' relkind constants per https://www.postgresql.org/docs/10/static/catalog-pg-class.html