Use system.jdbc for presto schema and add table filter

In lower version of Presto, sometimes the old query will throw "outputFormat should not be accessed from a null StorageFormat" error (see prestodb/presto/issues/6972). Sometimes there can be many many tables, it would be nice if we filter only for certain sources. We might also add access control based on the table filter.
ktmud · Jun 26, 2019 · b2c4cbe · b2c4cbe
1 parent 59b2bad
commit b2c4cbe
Showing 1 changed file with 17 additions and 7 deletions.
diff --git a/redash/query_runner/presto.py b/redash/query_runner/presto.py
@@ -47,9 +47,13 @@ def configuration_schema(cls):
                 'port': {
                     'type': 'number'
                 },
-                'schema': {
+                'default_schema': {
                     'type': 'string'
                 },
+                'table_filter': {
+                    'type': 'string',
+                    'default': 'RegExp to filter schema.tables'
+                },
                 'catalog': {
                     'type': 'string'
                 },
@@ -60,7 +64,8 @@ def configuration_schema(cls):
                     'type': 'string'
                 },
             },
-            'order': ['host', 'protocol', 'port', 'username', 'password', 'schema', 'catalog'],
+            'order': ['host', 'protocol', 'port', 'username', 'password',
+                      'default_schema', 'table_filter', 'catalog'],
             'required': ['host']
         }
 
@@ -75,10 +80,15 @@ def type(cls):
     def get_schema(self, get_stats=False):
         schema = {}
         query = """
-        SELECT table_schema, table_name, column_name
-        FROM information_schema.columns
-        WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
-        """
+        SELECT
+            table_schem, table_name, column_name
+        FROM system.jdbc.columns
+        WHERE table_cat = '{catalog}'
+            AND regexp_like(concat(table_schem, '.', table_name), '{table_filter}')
+        """.format(
+            catalog=self.configuration.get('catalog', 'hive'),
+            table_filter=self.configuration.get('table_filter', ''),
+        )
 
         results, error = self.run_query(query, None)
 
@@ -88,7 +98,7 @@ def get_schema(self, get_stats=False):
         results = json_loads(results)
 
         for row in results['rows']:
-            table_name = '{}.{}'.format(row['table_schema'], row['table_name'])
+            table_name = '{}.{}'.format(row['table_schem'], row['table_name'])
 
             if table_name not in schema:
                 schema[table_name] = {'name': table_name, 'columns': []}