From b2c4cbe245969ab5e7c47d93e44fa364cf488408 Mon Sep 17 00:00:00 2001
From: Jesse Yang <jyyjcc@gmail.com>
Date: Wed, 19 Jun 2019 23:22:16 -0700
Subject: [PATCH] Use system.jdbc for presto schema and add table filter

In lower version of Presto, sometimes the old query will throw
"outputFormat should not be accessed from a null StorageFormat"
error (see prestodb/presto/issues/6972).

Sometimes there can be many many tables, it would be nice
if we filter only for certain sources.

We might also add access control based on the table filter.
---
 redash/query_runner/presto.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/redash/query_runner/presto.py b/redash/query_runner/presto.py
index 2966d1ccf9..f55202d037 100644
--- a/redash/query_runner/presto.py
+++ b/redash/query_runner/presto.py
@@ -47,9 +47,13 @@ def configuration_schema(cls):
                 'port': {
                     'type': 'number'
                 },
-                'schema': {
+                'default_schema': {
                     'type': 'string'
                 },
+                'table_filter': {
+                    'type': 'string',
+                    'default': 'RegExp to filter schema.tables'
+                },
                 'catalog': {
                     'type': 'string'
                 },
@@ -60,7 +64,8 @@ def configuration_schema(cls):
                     'type': 'string'
                 },
             },
-            'order': ['host', 'protocol', 'port', 'username', 'password', 'schema', 'catalog'],
+            'order': ['host', 'protocol', 'port', 'username', 'password',
+                      'default_schema', 'table_filter', 'catalog'],
             'required': ['host']
         }
 
@@ -75,10 +80,15 @@ def type(cls):
     def get_schema(self, get_stats=False):
         schema = {}
         query = """
-        SELECT table_schema, table_name, column_name
-        FROM information_schema.columns
-        WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
-        """
+        SELECT
+            table_schem, table_name, column_name
+        FROM system.jdbc.columns
+        WHERE table_cat = '{catalog}'
+            AND regexp_like(concat(table_schem, '.', table_name), '{table_filter}')
+        """.format(
+            catalog=self.configuration.get('catalog', 'hive'),
+            table_filter=self.configuration.get('table_filter', ''),
+        )
 
         results, error = self.run_query(query, None)
 
@@ -88,7 +98,7 @@ def get_schema(self, get_stats=False):
         results = json_loads(results)
 
         for row in results['rows']:
-            table_name = '{}.{}'.format(row['table_schema'], row['table_name'])
+            table_name = '{}.{}'.format(row['table_schem'], row['table_name'])
 
             if table_name not in schema:
                 schema[table_name] = {'name': table_name, 'columns': []}