-
Notifications
You must be signed in to change notification settings - Fork 209
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add RedashDashboardExtractor for extracting dashboards from red…
…ash.io (#300) * Add RedashDashboardExtractor for extracting dashboards from redash.io * Fixed broken tests in python2 * Added supported Redash version and required Redash API endpoints to README
- Loading branch information
Showing
10 changed files
with
774 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
243 changes: 243 additions & 0 deletions
243
databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
import importlib | ||
from pyhocon import ConfigFactory | ||
from databuilder.models.dashboard.dashboard_metadata import DashboardMetadata | ||
from databuilder.models.dashboard.dashboard_last_modified import DashboardLastModifiedTimestamp | ||
from databuilder.models.dashboard.dashboard_owner import DashboardOwner | ||
from databuilder.models.dashboard.dashboard_query import DashboardQuery | ||
from databuilder.models.dashboard.dashboard_table import DashboardTable | ||
from databuilder.models.table_metadata import TableMetadata | ||
from databuilder.extractor.base_extractor import Extractor | ||
from databuilder.rest_api.rest_api_query import RestApiQuery | ||
from databuilder.rest_api.base_rest_api_query import EmptyRestApiQuerySeed | ||
from databuilder.extractor.restapi.rest_api_extractor import RestAPIExtractor, REST_API_QUERY | ||
from databuilder.extractor.dashboard.redash.redash_dashboard_utils import \ | ||
get_auth_headers, get_text_widgets, get_visualization_widgets, sort_widgets, \ | ||
generate_dashboard_description, RedashPaginatedRestApiQuery | ||
from databuilder.transformer.base_transformer import ChainedTransformer | ||
from databuilder.transformer.timestamp_string_to_epoch import TimestampStringToEpoch, FIELD_NAME as TS_FIELD_NAME | ||
|
||
|
||
class TableRelationData: | ||
""" | ||
This is sort of like a stripped down version of `TableMetadata`. | ||
It is used as the type returned by the (optional) table parser. | ||
""" | ||
|
||
def __init__(self, database, cluster, schema, name): | ||
# type: (str, str, str, str) -> None | ||
|
||
self._data = {'db': database, 'cluster': cluster, 'schema': schema, 'tbl': name} | ||
|
||
@property | ||
def key(self): | ||
# type: () -> str | ||
|
||
return TableMetadata.TABLE_KEY_FORMAT.format(**self._data) | ||
|
||
|
||
class RedashDashboardExtractor(Extractor): | ||
""" | ||
An extractor for retrieving dashboards and associated queries | ||
(and possibly tables) from Redash. | ||
There are five configuration values: | ||
- `redash_base_url`: (e.g., `https://redash.example.com`) Base URL for the user-facing | ||
Redash application | ||
- `api_base_url`: (e.g., `https://redash.example.com/api`) Base URL for the API | ||
- `api_key`: Redash API key | ||
- (optional) `cluster`: A cluster name for this Redash instance (defaults to `prod`) | ||
- (optional) `table_parser`: A function `(RedashVisualizationWidget) -> List[TableRelationData]`. | ||
Given a `RedashVisualizationWidget`, this should return a list of potentially related tables | ||
in Amundsen. Any table returned that exists in Amundsen will be linked to the dashboard. | ||
Any table that does not exist will be ignored. | ||
""" | ||
|
||
REDASH_BASE_URL_KEY = 'redash_base_url' | ||
API_BASE_URL_KEY = 'api_base_url' | ||
API_KEY_KEY = 'api_key' | ||
CLUSTER_KEY = 'cluster' # optional config | ||
TABLE_PARSER_KEY = 'table_parser' # optional config | ||
|
||
DEFAULT_CLUSTER = 'prod' | ||
|
||
PRODUCT = 'redash' | ||
DASHBOARD_GROUP_ID = 'redash' | ||
DASHBOARD_GROUP_NAME = 'Redash' | ||
|
||
def init(self, conf): | ||
# type: (ConfigTree) -> None | ||
|
||
# required configuration | ||
self._redash_base_url = conf.get_string(RedashDashboardExtractor.REDASH_BASE_URL_KEY) | ||
self._api_base_url = conf.get_string(RedashDashboardExtractor.API_BASE_URL_KEY) | ||
self._api_key = conf.get_string(RedashDashboardExtractor.API_KEY_KEY) | ||
|
||
# optional configuration | ||
self._cluster = conf.get_string( | ||
RedashDashboardExtractor.CLUSTER_KEY, RedashDashboardExtractor.DEFAULT_CLUSTER | ||
) | ||
self._parse_tables = None | ||
tbl_parser_path = conf.get_string(RedashDashboardExtractor.TABLE_PARSER_KEY) | ||
if tbl_parser_path: | ||
module_name, fn_name = tbl_parser_path.rsplit('.', 1) | ||
mod = importlib.import_module(module_name) | ||
self._parse_tables = getattr(mod, fn_name) | ||
|
||
self._extractor = self._build_extractor() | ||
self._transformer = self._build_transformer() | ||
self._extract_iter = None | ||
|
||
def _is_published_dashboard(self, record): | ||
# type: Dict[str, Any] -> bool | ||
|
||
return not (record['is_archived'] or record['is_draft']) | ||
|
||
def _get_extract_iter(self): | ||
# type: () -> Iterator[Any] | ||
|
||
while True: | ||
record = self._extractor.extract() | ||
if not record: | ||
break # the end. | ||
|
||
record = self._transformer.transform(record=record) | ||
|
||
if not self._is_published_dashboard(record): | ||
continue # filter this one out | ||
|
||
identity_data = { | ||
'cluster': self._cluster, | ||
'product': RedashDashboardExtractor.PRODUCT, | ||
'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID, | ||
'dashboard_id': record['dashboard_id'] | ||
} | ||
|
||
dash_data = { | ||
'dashboard_group': | ||
RedashDashboardExtractor.DASHBOARD_GROUP_NAME, | ||
'dashboard_group_url': | ||
self._redash_base_url, | ||
'dashboard_name': | ||
record['dashboard_name'], | ||
'dashboard_url': | ||
'{redash}/dashboard/{slug}' | ||
.format(redash=self._redash_base_url, slug=record['slug']), | ||
'created_timestamp': | ||
record['created_timestamp'] | ||
} | ||
dash_data.update(identity_data) | ||
|
||
widgets = sort_widgets(record['widgets']) | ||
text_widgets = get_text_widgets(widgets) | ||
viz_widgets = get_visualization_widgets(widgets) | ||
|
||
# generate a description for this dashboard, since Redash does not have descriptions | ||
dash_data['description'] = generate_dashboard_description(text_widgets, viz_widgets) | ||
|
||
yield DashboardMetadata(**dash_data) | ||
|
||
last_mod_data = {'last_modified_timestamp': record['last_modified_timestamp']} | ||
last_mod_data.update(identity_data) | ||
|
||
yield DashboardLastModifiedTimestamp(**last_mod_data) | ||
|
||
owner_data = {'email': record['user']['email']} | ||
owner_data.update(identity_data) | ||
|
||
yield DashboardOwner(**owner_data) | ||
|
||
table_keys = set() | ||
|
||
for viz in viz_widgets: | ||
query_data = { | ||
'query_id': viz.query_id, | ||
'query_name': viz.query_name, | ||
'url': self._redash_base_url + viz.query_relative_url, | ||
'query_text': viz.raw_query | ||
} | ||
|
||
query_data.update(identity_data) | ||
yield DashboardQuery(**query_data) | ||
|
||
# if a table parser is provided, retrieve tables from this viz | ||
if self._parse_tables: | ||
for tbl in self._parse_tables(viz): | ||
table_keys.add(tbl.key) | ||
|
||
if len(table_keys) > 0: | ||
yield DashboardTable(table_ids=list(table_keys), **identity_data) | ||
|
||
def extract(self): | ||
# type: () -> Any | ||
|
||
if not self._extract_iter: | ||
self._extract_iter = self._get_extract_iter() | ||
try: | ||
return next(self._extract_iter) | ||
except StopIteration: | ||
return None | ||
|
||
def _build_restapi_query(self): | ||
# type: () -> RestApiQuery | ||
|
||
dashes_query = RedashPaginatedRestApiQuery( | ||
query_to_join=EmptyRestApiQuerySeed(), | ||
url='{redash_api}/dashboards'.format(redash_api=self._api_base_url), | ||
params=self._get_default_api_query_params(), | ||
json_path='results[*].[id,name,slug,created_at,updated_at,is_archived,is_draft,user]', | ||
field_names=[ | ||
'dashboard_id', 'dashboard_name', 'slug', 'created_timestamp', | ||
'last_modified_timestamp', 'is_archived', 'is_draft', 'user' | ||
], | ||
skip_no_result=True | ||
) | ||
|
||
return RestApiQuery( | ||
query_to_join=dashes_query, | ||
url='{redash_api}/dashboards/{{slug}}'.format(redash_api=self._api_base_url), | ||
params=self._get_default_api_query_params(), | ||
json_path='widgets', | ||
field_names=['widgets'], | ||
skip_no_result=True | ||
) | ||
|
||
def _get_default_api_query_params(self): | ||
# type: () -> Dict[str, Any] | ||
|
||
return {'headers': get_auth_headers(self._api_key)} | ||
|
||
def _build_extractor(self): | ||
# type: () -> RestAPIExtractor | ||
|
||
extractor = RestAPIExtractor() | ||
rest_api_extractor_conf = ConfigFactory.from_dict({ | ||
REST_API_QUERY: self._build_restapi_query() | ||
}) | ||
extractor.init(rest_api_extractor_conf) | ||
return extractor | ||
|
||
def _build_transformer(self): | ||
# type: () -> ChainedTransformer | ||
|
||
transformers = [] | ||
|
||
# transform timestamps from ISO to unix epoch | ||
ts_transformer_1 = TimestampStringToEpoch() | ||
ts_transformer_1.init(ConfigFactory.from_dict({ | ||
TS_FIELD_NAME: 'created_timestamp', | ||
})) | ||
transformers.append(ts_transformer_1) | ||
|
||
ts_transformer_2 = TimestampStringToEpoch() | ||
ts_transformer_2.init(ConfigFactory.from_dict({ | ||
TS_FIELD_NAME: 'last_modified_timestamp', | ||
})) | ||
transformers.append(ts_transformer_2) | ||
|
||
return ChainedTransformer(transformers=transformers) | ||
|
||
def get_scope(self): | ||
# type: () -> str | ||
|
||
return 'extractor.redash_dashboard' |
Oops, something went wrong.