Skip to content
This repository has been archived by the owner on Jul 9, 2022. It is now read-only.

Commit

Permalink
Use system.jdbc for presto schema and add table filter
Browse files Browse the repository at this point in the history
In lower version of Presto, sometimes the old query will throw
"outputFormat should not be accessed from a null StorageFormat"
error (see prestodb/presto/issues/6972).

Sometimes there can be many many tables, it would be nice
if we filter only for certain sources.

We might also add access control based on the table filter.
  • Loading branch information
ktmud committed Jun 26, 2019
1 parent 59b2bad commit b2c4cbe
Showing 1 changed file with 17 additions and 7 deletions.
24 changes: 17 additions & 7 deletions redash/query_runner/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,13 @@ def configuration_schema(cls):
'port': {
'type': 'number'
},
'schema': {
'default_schema': {
'type': 'string'
},
'table_filter': {
'type': 'string',
'default': 'RegExp to filter schema.tables'
},
'catalog': {
'type': 'string'
},
Expand All @@ -60,7 +64,8 @@ def configuration_schema(cls):
'type': 'string'
},
},
'order': ['host', 'protocol', 'port', 'username', 'password', 'schema', 'catalog'],
'order': ['host', 'protocol', 'port', 'username', 'password',
'default_schema', 'table_filter', 'catalog'],
'required': ['host']
}

Expand All @@ -75,10 +80,15 @@ def type(cls):
def get_schema(self, get_stats=False):
schema = {}
query = """
SELECT table_schema, table_name, column_name
FROM information_schema.columns
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
"""
SELECT
table_schem, table_name, column_name
FROM system.jdbc.columns
WHERE table_cat = '{catalog}'
AND regexp_like(concat(table_schem, '.', table_name), '{table_filter}')
""".format(
catalog=self.configuration.get('catalog', 'hive'),
table_filter=self.configuration.get('table_filter', ''),
)

results, error = self.run_query(query, None)

Expand All @@ -88,7 +98,7 @@ def get_schema(self, get_stats=False):
results = json_loads(results)

for row in results['rows']:
table_name = '{}.{}'.format(row['table_schema'], row['table_name'])
table_name = '{}.{}'.format(row['table_schem'], row['table_name'])

if table_name not in schema:
schema[table_name] = {'name': table_name, 'columns': []}
Expand Down

0 comments on commit b2c4cbe

Please sign in to comment.