diff --git a/README.md b/README.md index a2934e415..d8fbe0611 100644 --- a/README.md +++ b/README.md @@ -616,6 +616,47 @@ Note that this provides accumulated view count which does [not effectively show If you are fine with `accumulated usage`, you could use TemplateVariableSubstitutionTransformer to transform Dict payload from [ModeDashboardUsageExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py) to fit [DashboardUsage](./docs/models.md#dashboardusage) and transform Dict to [DashboardUsage](./docs/models.md#dashboardusage) by [TemplateVariableSubstitutionTransformer](./databuilder/transformer/template_variable_substitution_transformer.py), and [DictToModel](./databuilder/transformer/dict_to_model.py) transformers. ([Example](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py#L36) on how to combining these two transformers) +### [RedashDashboardExtractor](./databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py) + +The included `RedashDashboardExtractor` provides support for extracting basic metadata for Redash dashboards (dashboard name, owner, URL, created/updated timestamps, and a generated description) and their associated queries (query name, URL, and raw query). It can be extended with a configurable table parser function to also support extraction of `DashboardTable` metadata. (See below for example usage.) + +Note: `DashboardUsage` and `DashboardExecution` metadata are not supported in this extractor, as these concepts are not supported by the Redash API. + +The `RedashDashboardExtractor` depends on the following Redash API endpoints: `GET /api/dashboards`, `GET /api/dashboards/`. It has been tested against Redash 8 and is also expected to work with Redash 9. + +```python +extractor = RedashDashboardExtractor() +task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader()) + +job_config = ConfigFactory.from_dict({ + 'extractor.redash_dashboard.redash_base_url': redash_base_url, # ex: https://redash.example.org + 'extractor.redash_dashboard.api_base_url': api_base_url, # ex: https://redash.example.org/api + 'extractor.redash_dashboard.api_key': api_key, # ex: abc1234 + 'extractor.redash_dashboard.table_parser': table_parser # ex: my_library.module.parse_tables +}) + +job = DefaultJob(conf=job_config, + task=task, + publisher=Neo4jCsvPublisher()) +job.launch() +``` + +#### RedashDashboardExtractor: table_parser + +The `RedashDashboardExtractor` extracts raw queries from each dashboard. You may optionally use these queries to parse out relations to tables in Amundsen. A table parser can be provided in the configuration for the `RedashDashboardExtractor`, as seen above. This function should have type signature `(RedashVisualizationWidget) -> Iterator[TableRelationData]`. For example: + +```python +def parse_tables(viz_widget): + # type: (RedashVisualiationWidget) -> Iterator[TableRelationData] + # Each viz_widget corresponds to one query. + # viz_widget.data_source_id is the ID of the target DB in Redash. + # viz_widget.raw_query is the raw query (e.g., SQL). + if viz_widget.data_source_id == 123: + table_names = some_sql_parser(viz_widget.raw_query) + return [TableRelationData('some_db', 'prod', 'some_schema', tbl) for tbl in table_names] + return [] +``` + ## List of transformers #### [ChainedTransformer](https://github.com/lyft/amundsendatabuilder/blob/master/databuilder/transformer/base_transformer.py#L41 "ChainedTransformer") diff --git a/databuilder/extractor/dashboard/redash/__init__.py b/databuilder/extractor/dashboard/redash/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py b/databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py new file mode 100644 index 000000000..150d4b383 --- /dev/null +++ b/databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py @@ -0,0 +1,243 @@ +import importlib +from pyhocon import ConfigFactory +from databuilder.models.dashboard.dashboard_metadata import DashboardMetadata +from databuilder.models.dashboard.dashboard_last_modified import DashboardLastModifiedTimestamp +from databuilder.models.dashboard.dashboard_owner import DashboardOwner +from databuilder.models.dashboard.dashboard_query import DashboardQuery +from databuilder.models.dashboard.dashboard_table import DashboardTable +from databuilder.models.table_metadata import TableMetadata +from databuilder.extractor.base_extractor import Extractor +from databuilder.rest_api.rest_api_query import RestApiQuery +from databuilder.rest_api.base_rest_api_query import EmptyRestApiQuerySeed +from databuilder.extractor.restapi.rest_api_extractor import RestAPIExtractor, REST_API_QUERY +from databuilder.extractor.dashboard.redash.redash_dashboard_utils import \ + get_auth_headers, get_text_widgets, get_visualization_widgets, sort_widgets, \ + generate_dashboard_description, RedashPaginatedRestApiQuery +from databuilder.transformer.base_transformer import ChainedTransformer +from databuilder.transformer.timestamp_string_to_epoch import TimestampStringToEpoch, FIELD_NAME as TS_FIELD_NAME + + +class TableRelationData: + """ + This is sort of like a stripped down version of `TableMetadata`. + It is used as the type returned by the (optional) table parser. + """ + + def __init__(self, database, cluster, schema, name): + # type: (str, str, str, str) -> None + + self._data = {'db': database, 'cluster': cluster, 'schema': schema, 'tbl': name} + + @property + def key(self): + # type: () -> str + + return TableMetadata.TABLE_KEY_FORMAT.format(**self._data) + + +class RedashDashboardExtractor(Extractor): + """ + An extractor for retrieving dashboards and associated queries + (and possibly tables) from Redash. + + There are five configuration values: + + - `redash_base_url`: (e.g., `https://redash.example.com`) Base URL for the user-facing + Redash application + - `api_base_url`: (e.g., `https://redash.example.com/api`) Base URL for the API + - `api_key`: Redash API key + - (optional) `cluster`: A cluster name for this Redash instance (defaults to `prod`) + - (optional) `table_parser`: A function `(RedashVisualizationWidget) -> List[TableRelationData]`. + Given a `RedashVisualizationWidget`, this should return a list of potentially related tables + in Amundsen. Any table returned that exists in Amundsen will be linked to the dashboard. + Any table that does not exist will be ignored. + """ + + REDASH_BASE_URL_KEY = 'redash_base_url' + API_BASE_URL_KEY = 'api_base_url' + API_KEY_KEY = 'api_key' + CLUSTER_KEY = 'cluster' # optional config + TABLE_PARSER_KEY = 'table_parser' # optional config + + DEFAULT_CLUSTER = 'prod' + + PRODUCT = 'redash' + DASHBOARD_GROUP_ID = 'redash' + DASHBOARD_GROUP_NAME = 'Redash' + + def init(self, conf): + # type: (ConfigTree) -> None + + # required configuration + self._redash_base_url = conf.get_string(RedashDashboardExtractor.REDASH_BASE_URL_KEY) + self._api_base_url = conf.get_string(RedashDashboardExtractor.API_BASE_URL_KEY) + self._api_key = conf.get_string(RedashDashboardExtractor.API_KEY_KEY) + + # optional configuration + self._cluster = conf.get_string( + RedashDashboardExtractor.CLUSTER_KEY, RedashDashboardExtractor.DEFAULT_CLUSTER + ) + self._parse_tables = None + tbl_parser_path = conf.get_string(RedashDashboardExtractor.TABLE_PARSER_KEY) + if tbl_parser_path: + module_name, fn_name = tbl_parser_path.rsplit('.', 1) + mod = importlib.import_module(module_name) + self._parse_tables = getattr(mod, fn_name) + + self._extractor = self._build_extractor() + self._transformer = self._build_transformer() + self._extract_iter = None + + def _is_published_dashboard(self, record): + # type: Dict[str, Any] -> bool + + return not (record['is_archived'] or record['is_draft']) + + def _get_extract_iter(self): + # type: () -> Iterator[Any] + + while True: + record = self._extractor.extract() + if not record: + break # the end. + + record = self._transformer.transform(record=record) + + if not self._is_published_dashboard(record): + continue # filter this one out + + identity_data = { + 'cluster': self._cluster, + 'product': RedashDashboardExtractor.PRODUCT, + 'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID, + 'dashboard_id': record['dashboard_id'] + } + + dash_data = { + 'dashboard_group': + RedashDashboardExtractor.DASHBOARD_GROUP_NAME, + 'dashboard_group_url': + self._redash_base_url, + 'dashboard_name': + record['dashboard_name'], + 'dashboard_url': + '{redash}/dashboard/{slug}' + .format(redash=self._redash_base_url, slug=record['slug']), + 'created_timestamp': + record['created_timestamp'] + } + dash_data.update(identity_data) + + widgets = sort_widgets(record['widgets']) + text_widgets = get_text_widgets(widgets) + viz_widgets = get_visualization_widgets(widgets) + + # generate a description for this dashboard, since Redash does not have descriptions + dash_data['description'] = generate_dashboard_description(text_widgets, viz_widgets) + + yield DashboardMetadata(**dash_data) + + last_mod_data = {'last_modified_timestamp': record['last_modified_timestamp']} + last_mod_data.update(identity_data) + + yield DashboardLastModifiedTimestamp(**last_mod_data) + + owner_data = {'email': record['user']['email']} + owner_data.update(identity_data) + + yield DashboardOwner(**owner_data) + + table_keys = set() + + for viz in viz_widgets: + query_data = { + 'query_id': viz.query_id, + 'query_name': viz.query_name, + 'url': self._redash_base_url + viz.query_relative_url, + 'query_text': viz.raw_query + } + + query_data.update(identity_data) + yield DashboardQuery(**query_data) + + # if a table parser is provided, retrieve tables from this viz + if self._parse_tables: + for tbl in self._parse_tables(viz): + table_keys.add(tbl.key) + + if len(table_keys) > 0: + yield DashboardTable(table_ids=list(table_keys), **identity_data) + + def extract(self): + # type: () -> Any + + if not self._extract_iter: + self._extract_iter = self._get_extract_iter() + try: + return next(self._extract_iter) + except StopIteration: + return None + + def _build_restapi_query(self): + # type: () -> RestApiQuery + + dashes_query = RedashPaginatedRestApiQuery( + query_to_join=EmptyRestApiQuerySeed(), + url='{redash_api}/dashboards'.format(redash_api=self._api_base_url), + params=self._get_default_api_query_params(), + json_path='results[*].[id,name,slug,created_at,updated_at,is_archived,is_draft,user]', + field_names=[ + 'dashboard_id', 'dashboard_name', 'slug', 'created_timestamp', + 'last_modified_timestamp', 'is_archived', 'is_draft', 'user' + ], + skip_no_result=True + ) + + return RestApiQuery( + query_to_join=dashes_query, + url='{redash_api}/dashboards/{{slug}}'.format(redash_api=self._api_base_url), + params=self._get_default_api_query_params(), + json_path='widgets', + field_names=['widgets'], + skip_no_result=True + ) + + def _get_default_api_query_params(self): + # type: () -> Dict[str, Any] + + return {'headers': get_auth_headers(self._api_key)} + + def _build_extractor(self): + # type: () -> RestAPIExtractor + + extractor = RestAPIExtractor() + rest_api_extractor_conf = ConfigFactory.from_dict({ + REST_API_QUERY: self._build_restapi_query() + }) + extractor.init(rest_api_extractor_conf) + return extractor + + def _build_transformer(self): + # type: () -> ChainedTransformer + + transformers = [] + + # transform timestamps from ISO to unix epoch + ts_transformer_1 = TimestampStringToEpoch() + ts_transformer_1.init(ConfigFactory.from_dict({ + TS_FIELD_NAME: 'created_timestamp', + })) + transformers.append(ts_transformer_1) + + ts_transformer_2 = TimestampStringToEpoch() + ts_transformer_2.init(ConfigFactory.from_dict({ + TS_FIELD_NAME: 'last_modified_timestamp', + })) + transformers.append(ts_transformer_2) + + return ChainedTransformer(transformers=transformers) + + def get_scope(self): + # type: () -> str + + return 'extractor.redash_dashboard' diff --git a/databuilder/extractor/dashboard/redash/redash_dashboard_utils.py b/databuilder/extractor/dashboard/redash/redash_dashboard_utils.py new file mode 100644 index 000000000..628b16d7f --- /dev/null +++ b/databuilder/extractor/dashboard/redash/redash_dashboard_utils.py @@ -0,0 +1,153 @@ +from databuilder.rest_api.rest_api_query import RestApiQuery + + +def sort_widgets(widgets): + # type: (Iterator[Dict[str, Any]]) -> Iterator[Dict[str, Any]] + """ + Sort raw widget data (as returned from the API) according to the position + of the widgets in the dashboard (top to bottom, left to right) + Redash does not return widgets in order of their position, + so we do this to ensure that we look at widgets in a sensible order. + """ + + def row_and_col(widget): + # these entities usually but not always have explicit rows and cols + pos = widget['options'].get('position', {}) + return (pos.get('row', 0), pos.get('col', 0)) + + return sorted(widgets, key=row_and_col) + + +def get_text_widgets(widgets): + # type: (Iterator[Dict[str, Any]]) -> List[RedashTextWidget] + """ + From the raw set of widget data returned from the API, filter down + to text widgets and return them as a list of `RedashTextWidget` + """ + + return [RedashTextWidget(widget) for widget in widgets + if 'text' in widget and 'visualization' not in widget] + + +def get_visualization_widgets(widgets): + # type: (Iterator[Dict[str, Any]]) -> List[RedashVisualizationWidget] + """ + From the raw set of widget data returned from the API, filter down + to visualization widgets and return them as a list of `RedashVisualizationWidget` + """ + + return [RedashVisualizationWidget(widget) for widget in widgets + if 'visualization' in widget] + + +def get_auth_headers(api_key): + # type: (str) -> Dict[str, str] + return {'Authorization': 'Key {}'.format(api_key)} + + +def generate_dashboard_description(text_widgets, viz_widgets): + # type: (Iterator[RedashTextWidget], Iterator[RedashVisualizationWidget]) -> str + """ + Redash doesn't have dashboard descriptions, so we'll make our own. + If there exist any text widgets, concatenate them, + and use this text as the description for this dashboard. + If not, put together a list of query names. + If all else fails, this looks like an empty dashboard. + """ + + if len(text_widgets) > 0: + return '\n\n'.join([w.text for w in text_widgets]) + elif len(viz_widgets) > 0: + query_list = '\n'.join(['- {}'.format(v.query_name) for v in set(viz_widgets)]) + return 'A dashboard containing the following queries:\n\n' + query_list + + return 'This dashboard appears to be empty!' + + +class RedashVisualizationWidget: + """ + A visualization widget in a Redash dashboard. + These are mapped 1:1 with queries, and can be of various types, e.g.: + CHART, TABLE, PIVOT, etc. + The query name acts like a title for the widget on the dashboard. + """ + + def __init__(self, data): + # type: (Dict[str, Any]) -> None + self._data = data + + @property + def raw_query(self): + # type () -> str + return self._data['visualization']['query']['query'] + + @property + def data_source_id(self): + # type: () -> int + return self._data['visualization']['query']['data_source_id'] + + @property + def query_id(self): + # type: () -> int + return self._data['visualization']['query']['id'] + + @property + def query_relative_url(self): + # type: () -> str + return '/queries/{id}'.format(id=self.query_id) + + @property + def query_name(self): + # type: () -> str + return self._data['visualization']['query']['name'] + + +class RedashTextWidget: + """ + A textbox in a Redash dashboad. + It pretty much just contains a single text property (Markdown). + """ + + def __init__(self, data): + # type: (Dict[str, Any]) -> None + self._data = data + + @property + def text(self): + # type: () -> str + return self._data['text'] + + +class RedashPaginatedRestApiQuery(RestApiQuery): + """ + Paginated Redash API queries + """ + + def __init__(self, **kwargs): + # type: (...) -> None + super(RedashPaginatedRestApiQuery, self).__init__(**kwargs) + if 'params' not in self._params: + self._params['params'] = {} + self._params['params']['page'] = 1 + + def _total_records(self, res): + # type: (Dict[str, Any]) -> int + return res['count'] + + def _max_record_on_page(self, res): + # type: (Dict[str, Any]) -> int + return res['page_size'] * res['page'] + + def _next_page(self, res): + # type: (Dict[str, Any]) -> int + return res['page'] + 1 + + def _post_process(self, response): + # type: (Any) -> None + parsed = response.json() + + if self._max_record_on_page(parsed) >= self._total_records(parsed): + self._more_pages = False + else: + self._params['params']['page'] = self._next_page(parsed) + self._more_pages = True diff --git a/databuilder/rest_api/base_rest_api_query.py b/databuilder/rest_api/base_rest_api_query.py index 61a25aac1..c429d35cc 100644 --- a/databuilder/rest_api/base_rest_api_query.py +++ b/databuilder/rest_api/base_rest_api_query.py @@ -42,3 +42,14 @@ def execute(self): # type: () -> Iterator[Dict[str, Any]] return iter(self._seed_record) + + +class EmptyRestApiQuerySeed(RestApiQuerySeed): + """ + Sometimes there simply isn't a record to seed with. + """ + + def __init__(self): + # type: () -> None + + super(EmptyRestApiQuerySeed, self).__init__([{'empty_rest_api_query_seed': 1}]) diff --git a/tests/unit/extractor/dashboard/__init__.py b/tests/unit/extractor/dashboard/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/extractor/dashboard/redash/__init__.py b/tests/unit/extractor/dashboard/redash/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/extractor/dashboard/redash/test_redash_dashboard_extractor.py b/tests/unit/extractor/dashboard/redash/test_redash_dashboard_extractor.py new file mode 100644 index 000000000..2e3046981 --- /dev/null +++ b/tests/unit/extractor/dashboard/redash/test_redash_dashboard_extractor.py @@ -0,0 +1,146 @@ +import logging +import unittest + +from mock import patch +from pyhocon import ConfigFactory # noqa: F401 + +from databuilder import Scoped +from databuilder.extractor.dashboard.redash.redash_dashboard_extractor import \ + RedashDashboardExtractor, TableRelationData +from databuilder.models.dashboard.dashboard_last_modified import DashboardLastModifiedTimestamp +from databuilder.models.dashboard.dashboard_owner import DashboardOwner +from databuilder.models.dashboard.dashboard_query import DashboardQuery +from databuilder.models.dashboard.dashboard_table import DashboardTable + + +logging.basicConfig(level=logging.INFO) + + +def dummy_tables(*args): + return [TableRelationData('some_db', 'prod', 'public', 'users')] + + +class MockApiResponse: + def __init__(self, data): + self.json_data = data + self.status_code = 200 + + def json(self): + return self.json_data + + def raise_for_status(self): + pass + + +class TestRedashDashboardExtractor(unittest.TestCase): + def test_table_relation_data(self): + tr = TableRelationData('db', 'cluster', 'schema', 'tbl') + self.assertEqual(tr.key, 'db://cluster.schema/tbl') + + def test_with_one_dashboard(self): + + def mock_api_get(url, *args, **kwargs): + if 'test-dash' in url: + return MockApiResponse({ + 'id': 123, + 'widgets': [ + { + 'visualization': { + 'query': { + 'data_source_id': 1, + 'id': 1234, + 'name': 'Test Query', + 'query': 'SELECT id FROM users' + } + }, + 'options': {} + } + ] + }) + + return MockApiResponse({ + 'page': 1, + 'count': 1, + 'page_size': 50, + 'results': [ + { + 'id': 123, + 'name': 'Test Dash', + 'slug': 'test-dash', + 'created_at': '2020-01-01T00:00:00.000Z', + 'updated_at': '2020-01-02T00:00:00.000Z', + 'is_archived': False, + 'is_draft': False, + 'user': {'email': 'asdf@example.com'} + } + ] + }) + + redash_base_url = 'https://redash.example.com' + config = ConfigFactory.from_dict({ + 'extractor.redash_dashboard.redash_base_url': redash_base_url, + 'extractor.redash_dashboard.api_base_url': redash_base_url, # probably not but doesn't matter + 'extractor.redash_dashboard.api_key': 'abc123', + 'extractor.redash_dashboard.table_parser': + 'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables' + }) + + with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get: + mock_get.side_effect = mock_api_get + + extractor = RedashDashboardExtractor() + extractor.init(Scoped.get_scoped_conf(conf=config, scope=extractor.get_scope())) + + # DashboardMetadata + record = extractor.extract() + self.assertEqual(record.dashboard_id, 123) + self.assertEqual(record.dashboard_name, 'Test Dash') + self.assertEqual(record.dashboard_group_id, RedashDashboardExtractor.DASHBOARD_GROUP_ID) + self.assertEqual(record.dashboard_group, RedashDashboardExtractor.DASHBOARD_GROUP_NAME) + self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT) + self.assertEqual(record.cluster, RedashDashboardExtractor.DEFAULT_CLUSTER) + self.assertEqual(record.created_timestamp, 1577836800) + self.assertTrue(redash_base_url in record.dashboard_url) + self.assertTrue('test-dash' in record.dashboard_url) + + # DashboardLastModified + record = extractor.extract() + identity = { + 'dashboard_id': 123, + 'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID, + 'product': RedashDashboardExtractor.PRODUCT, + 'cluster': u'prod' + } + expected = DashboardLastModifiedTimestamp( + last_modified_timestamp=1577923200, + **identity + ) + self.assertEqual(record.__repr__(), expected.__repr__()) + + # DashboardOwner + record = extractor.extract() + expected = DashboardOwner(email='asdf@example.com', **identity) + self.assertEqual(record.__repr__(), expected.__repr__()) + + # DashboardQuery + record = extractor.extract() + expected = DashboardQuery( + query_id=1234, + query_name='Test Query', + url=u'{base}/queries/1234'.format(base=redash_base_url), + query_text='SELECT id FROM users', + **identity + ) + self.assertEqual(record.__repr__(), expected.__repr__()) + + # DashboardTable + record = extractor.extract() + expected = DashboardTable( + table_ids=[TableRelationData('some_db', 'prod', 'public', 'users').key], + **identity + ) + self.assertEqual(record.__repr__(), expected.__repr__()) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/extractor/dashboard/redash/test_redash_dashboard_utils.py b/tests/unit/extractor/dashboard/redash/test_redash_dashboard_utils.py new file mode 100644 index 000000000..5b96a2f1b --- /dev/null +++ b/tests/unit/extractor/dashboard/redash/test_redash_dashboard_utils.py @@ -0,0 +1,173 @@ +import logging +import random +import unittest + +from mock import patch + +from databuilder.rest_api.base_rest_api_query import EmptyRestApiQuerySeed +from databuilder.extractor.dashboard.redash.redash_dashboard_utils import \ + get_text_widgets, get_visualization_widgets, sort_widgets, \ + generate_dashboard_description, get_auth_headers, RedashPaginatedRestApiQuery + +logging.basicConfig(level=logging.INFO) + + +class TestRedashDashboardUtils(unittest.TestCase): + def test_sort_widgets(self): + widgets = [ + { + 'text': 'a', + 'options': {} + }, + { + 'text': 'b', + 'options': {'position': {'row': 1, 'col': 1}} + }, + { + 'text': 'c', + 'options': {'position': {'row': 1, 'col': 2}} + }, + { + 'text': 'd', + 'options': {'position': {'row': 2, 'col': 1}} + } + ] + random.shuffle(widgets) + sorted_widgets = sort_widgets(widgets) + self.assertListEqual([widget['text'] for widget in sorted_widgets], ['a', 'b', 'c', 'd']) + + def test_widget_filters(self): + widgets = [ + {'text': 'asdf', 'options': {'ex': 1}}, + {'text': 'asdf', 'options': {'ex': 2}}, + {'visualization': {}, 'options': {'ex': 1}}, + {'visualization': {}, 'options': {'ex': 2}}, + {'visualization': {}, 'options': {'ex': 3}} + ] + self.assertEqual(len(get_text_widgets(widgets)), 2) + self.assertEqual(len(get_visualization_widgets(widgets)), 3) + + def test_text_widget_props(self): + widget_data = { + 'text': 'asdf' + } + widget = get_text_widgets([widget_data])[0] + self.assertEqual(widget.text, 'asdf') + + def test_visualization_widget_props(self): + widget_data = { + 'visualization': { + 'query': { + 'id': 123, + 'data_source_id': 1, + 'query': 'SELECT 2+2 FROM DUAL', + 'name': 'Test' + } + } + } + widget = get_visualization_widgets([widget_data])[0] + + self.assertEqual(widget.query_id, 123) + self.assertEqual(widget.data_source_id, 1) + self.assertEqual(widget.raw_query, 'SELECT 2+2 FROM DUAL') + self.assertEqual(widget.query_name, 'Test') + + def test_descriptions_from_text(self): + text_widgets = get_text_widgets([ + {'text': 'T1'}, + {'text': 'T2'} + ]) + viz_widgets = get_visualization_widgets([ + { + 'visualization': { + 'query': { + 'id': 1, + 'data_source_id': 1, + 'name': 'Q1', + 'query': 'n/a' + } + } + }, + { + 'visualization': { + 'query': { + 'id': 2, + 'data_source_id': 1, + 'name': 'Q2', + 'query': 'n/a' + } + } + } + ]) + + # both text and viz widgets + desc1 = generate_dashboard_description(text_widgets, viz_widgets) + self.assertTrue('T1' in desc1) + self.assertTrue('T2' in desc1) + self.assertTrue('Q1' not in desc1) + + # only text widgets + desc2 = generate_dashboard_description(text_widgets, []) + self.assertEqual(desc1, desc2) + + # only viz widgets + desc3 = generate_dashboard_description([], viz_widgets) + self.assertTrue('Q1' in desc3) + self.assertTrue('Q2' in desc3) + + # no widgets + desc4 = generate_dashboard_description([], []) + self.assertTrue('empty' in desc4) + + def test_auth_headers(self): + headers = get_auth_headers('testkey') + self.assertTrue('testkey' in headers['Authorization']) + + def test_paginated_rest_api_query(self): + paged_content = [ + { + 'page': 1, + 'page_size': 5, + 'count': 12, + 'results': [{'test': True}] * 5 + }, + { + 'page': 2, + 'page_size': 5, + 'count': 12, + 'results': [{'test': True}] * 5 + }, + { + 'page': 3, + 'page_size': 5, + 'count': 12, + 'results': [{'test': True}] * 2 + }, + { + 'page': 4, + 'page_size': 5, + 'count': 12, + 'results': [] + } + ] + + with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get: + # .json() is called twice (ugh), so we have to double each page + mock_get.return_value.json.side_effect = [page for page in paged_content for page in [page] * 2] + + q = RedashPaginatedRestApiQuery(query_to_join=EmptyRestApiQuerySeed(), + url='example.com', + json_path='results[*].[test]', + params={}, + field_names=['test'], + skip_no_result=True) + n_records = 0 + for record in q.execute(): + self.assertEqual(record['test'], True) + n_records += 1 + + self.assertEqual(n_records, 12) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/rest_api/test_rest_api_query.py b/tests/unit/rest_api/test_rest_api_query.py index d0d55c1f9..c3e9ea03f 100644 --- a/tests/unit/rest_api/test_rest_api_query.py +++ b/tests/unit/rest_api/test_rest_api_query.py @@ -2,7 +2,7 @@ from mock import patch -from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed +from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed, EmptyRestApiQuerySeed from databuilder.rest_api.rest_api_query import RestApiQuery @@ -22,6 +22,12 @@ def test_rest_api_query_seed(self): self.assertListEqual(expected, result) + def test_empty_rest_api_query_seed(self): + rest_api_query = EmptyRestApiQuerySeed() + + result = [v for v in rest_api_query.execute()] + assert len(result) == 1 + def test_rest_api_query(self): seed_record = [{'foo1': 'bar1'},