forked from amundsen-io/amundsendatabuilder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tableau_dashboard_query_extractor.py
120 lines (101 loc) · 4.83 KB
/
tableau_dashboard_query_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
import logging
from typing import (
Any, Dict, Iterator,
)
from pyhocon import ConfigFactory, ConfigTree
import databuilder.extractor.dashboard.tableau.tableau_dashboard_constants as const
from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.dashboard.tableau.tableau_dashboard_utils import (
TableauDashboardUtils, TableauGraphQLApiExtractor,
)
from databuilder.extractor.restapi.rest_api_extractor import STATIC_RECORD_DICT
from databuilder.transformer.base_transformer import ChainedTransformer
from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel
LOGGER = logging.getLogger(__name__)
class TableauGraphQLApiQueryExtractor(TableauGraphQLApiExtractor):
"""
Implements the extraction-time logic for parsing the GraphQL result and transforming into a dict
that fills the DashboardQuery model. Allows workbooks to be exlcuded based on their project.
"""
CLUSTER = const.CLUSTER
EXCLUDED_PROJECTS = const.EXCLUDED_PROJECTS
def execute(self) -> Iterator[Dict[str, Any]]:
response = self.execute_query()
for query in response['customSQLTables']:
for workbook in query['downstreamWorkbooks']:
if workbook['projectName'] not in \
self._conf.get_list(TableauGraphQLApiQueryExtractor.EXCLUDED_PROJECTS, []):
data = {
'dashboard_group_id': workbook['projectName'],
'dashboard_id': TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
'query_name': query['name'],
'query_id': query['id'],
'query_text': query['query'],
'cluster': self._conf.get_string(TableauGraphQLApiQueryExtractor.CLUSTER)
}
yield data
class TableauDashboardQueryExtractor(Extractor):
"""
Extracts metadata about the queries associated with Tableau workbooks.
In terms of Tableau's Metadata API, these queries are called "custom SQL tables".
However, not every workbook uses custom SQL queries, and most are built with a mixture of using the
datasource fields directly and various "calculated" columns.
This extractor iterates through one query at a time, yielding a new relationship for every downstream
workbook that uses the query.
"""
API_BASE_URL = const.API_BASE_URL
API_VERSION = const.API_VERSION
CLUSTER = const.CLUSTER
EXCLUDED_PROJECTS = const.EXCLUDED_PROJECTS
SITE_NAME = const.SITE_NAME
TABLEAU_ACCESS_TOKEN_NAME = const.TABLEAU_ACCESS_TOKEN_NAME
TABLEAU_ACCESS_TOKEN_SECRET = const.TABLEAU_ACCESS_TOKEN_SECRET
VERIFY_REQUEST = const.VERIFY_REQUEST
def init(self, conf: ConfigTree) -> None:
self._conf = conf
self.query = """query {
customSQLTables {
id
name
query
downstreamWorkbooks {
name
projectName
}
}
}"""
self._extractor = self._build_extractor()
transformers = []
dict_to_model_transformer = DictToModel()
dict_to_model_transformer.init(
conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
ConfigFactory.from_dict(
{MODEL_CLASS: 'databuilder.models.dashboard.dashboard_query.DashboardQuery'})))
transformers.append(dict_to_model_transformer)
self._transformer = ChainedTransformer(transformers=transformers)
def extract(self) -> Any:
record = self._extractor.extract()
if not record:
return None
return next(self._transformer.transform(record=record), None)
def get_scope(self) -> str:
return 'extractor.tableau_dashboard_query'
def _build_extractor(self) -> TableauGraphQLApiQueryExtractor:
"""
Builds a TableauGraphQLApiQueryExtractor. All data required can be retrieved with a single GraphQL call.
:return: A TableauGraphQLApiQueryExtractor that provides dashboard query metadata.
"""
extractor = TableauGraphQLApiQueryExtractor()
tableau_extractor_conf = \
Scoped.get_scoped_conf(self._conf, extractor.get_scope())\
.with_fallback(self._conf)\
.with_fallback(ConfigFactory.from_dict({TableauGraphQLApiExtractor.QUERY: self.query,
STATIC_RECORD_DICT: {'product': 'tableau'}
}
)
)
extractor.init(conf=tableau_extractor_conf)
return extractor