diff --git a/UPDATING.md b/UPDATING.md index 3991b70838441..753ec05d0ebc8 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -3,6 +3,14 @@ This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. +## Superset 0.27.0 +* Superset 0.27 start to use nested layout for dashboard builder, which is not +backward-compatible with earlier dashboard grid data. We provide migration script +to automatically convert dashboard grid to nested layout data. To be safe, please +take a database backup prior to this upgrade. It's the only way people could go +back to a previous state. + + ## Superset 0.26.0 * Superset 0.26.0 deprecates the `superset worker` CLI, which is a simple wrapper around the `celery worker` command, forcing you into crafting diff --git a/superset/migrations/versions/bebcf3fed1fe_convert_dashboard_v1_positions.py b/superset/migrations/versions/bebcf3fed1fe_convert_dashboard_v1_positions.py new file mode 100644 index 0000000000000..aa2d95d0d5144 --- /dev/null +++ b/superset/migrations/versions/bebcf3fed1fe_convert_dashboard_v1_positions.py @@ -0,0 +1,663 @@ +"""Migrate dashboard position_json data from V1 to V2 + +Revision ID: bebcf3fed1fe +Revises: fc480c87706c +Create Date: 2018-07-22 11:59:07.025119 + +""" + +# revision identifiers, used by Alembic. +import collections +import json +import sys +from functools import reduce +import uuid + +from alembic import op +import sqlalchemy as sa +from sqlalchemy import ( + Table, Column, + Integer, String, Text, ForeignKey, +) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship + +from superset import db + +revision = 'bebcf3fed1fe' +down_revision = 'fc480c87706c' + +Base = declarative_base() + +BACKGROUND_TRANSPARENT = 'BACKGROUND_TRANSPARENT' +CHART_TYPE = 'DASHBOARD_CHART_TYPE' +COLUMN_TYPE = 'DASHBOARD_COLUMN_TYPE' +DASHBOARD_GRID_ID = 'DASHBOARD_GRID_ID' +DASHBOARD_GRID_TYPE = 'DASHBOARD_GRID_TYPE' +DASHBOARD_HEADER_ID = 'DASHBOARD_HEADER_ID' +DASHBOARD_HEADER_TYPE = 'DASHBOARD_HEADER_TYPE' +DASHBOARD_ROOT_ID = 'DASHBOARD_ROOT_ID' +DASHBOARD_ROOT_TYPE = 'DASHBOARD_ROOT_TYPE' +DASHBOARD_VERSION_KEY = 'DASHBOARD_VERSION_KEY' +MARKDOWN_TYPE = 'DASHBOARD_MARKDOWN_TYPE' +ROW_TYPE = 'DASHBOARD_ROW_TYPE' + +GRID_COLUMN_COUNT = 12 +GRID_MIN_COLUMN_COUNT = 1 +GRID_MIN_ROW_UNITS = 5 +GRID_RATIO = 4.0 +NUMBER_OF_CHARTS_PER_ROW = 3 +MAX_RECURSIVE_LEVEL = 6 +ROW_HEIGHT = 8 +TOTAL_COLUMNS = 48 +DEFAULT_CHART_WIDTH = int(TOTAL_COLUMNS / NUMBER_OF_CHARTS_PER_ROW) +MAX_VALUE = sys.maxsize + + +class Slice(Base): + """Declarative class to do query in upgrade""" + __tablename__ = 'slices' + id = Column(Integer, primary_key=True) + slice_name = Column(String(250)) + params = Column(Text) + viz_type = Column(String(250)) + + +dashboard_slices = Table( + 'dashboard_slices', Base.metadata, + Column('id', Integer, primary_key=True), + Column('dashboard_id', Integer, ForeignKey('dashboards.id')), + Column('slice_id', Integer, ForeignKey('slices.id')), +) + + +class Dashboard(Base): + """Declarative class to do query in upgrade""" + __tablename__ = 'dashboards' + id = sa.Column(sa.Integer, primary_key=True) + dashboard_title = sa.Column(String(500)) + position_json = sa.Column(sa.Text) + slices = relationship( + 'Slice', secondary=dashboard_slices, backref='dashboards') + + +def is_v2_dash(positions): + return ( + isinstance(positions, dict) and + positions.get('DASHBOARD_VERSION_KEY') == 'v2' + ) + + +def get_boundary(positions): + top = MAX_VALUE + left = MAX_VALUE + bottom = 0 + right = 0 + + for position in positions: + top = min(position['row'], top) + left = min(position['col'], left) + bottom = max(position['row'] + position['size_y'], bottom) + right = max(position['col'] + position['size_x'], right) + + return { + 'top': top, + 'bottom': bottom, + 'left': left, + 'right': right, + } + + +def generate_id(): + return uuid.uuid4().hex[:8] + + +def has_overlap(positions, xAxis=True): + sorted_positions = \ + sorted(positions[:], key=lambda pos: pos['col']) \ + if xAxis else sorted(positions[:], key=lambda pos: pos['row']) + + result = False + for idx, position in enumerate(sorted_positions): + if idx < len(sorted_positions) - 1: + if xAxis: + result = position['col'] + position['size_x'] > \ + sorted_positions[idx + 1]['col'] + else: + result = position['row'] + position['size_y'] > \ + sorted_positions[idx + 1]['row'] + if result: + break + + return result + + +def get_empty_layout(): + return { + DASHBOARD_VERSION_KEY: 'v2', + DASHBOARD_ROOT_ID: { + 'type': DASHBOARD_ROOT_TYPE, + 'id': DASHBOARD_ROOT_ID, + 'children': [DASHBOARD_GRID_ID], + }, + DASHBOARD_GRID_ID: { + 'type': DASHBOARD_GRID_TYPE, + 'id': DASHBOARD_GRID_ID, + 'children': [], + }, + } + + +def get_header_component(title): + return { + 'id': DASHBOARD_HEADER_ID, + 'type': DASHBOARD_HEADER_TYPE, + 'meta': { + 'text': title, + }, + } + + +def get_row_container(): + return { + 'type': ROW_TYPE, + 'id': 'DASHBOARD_ROW_TYPE-{}'.format(generate_id()), + 'children': [], + 'meta': { + 'background': BACKGROUND_TRANSPARENT, + }, + } + + +def get_col_container(): + return { + 'type': COLUMN_TYPE, + 'id': 'DASHBOARD_COLUMN_TYPE-{}'.format(generate_id()), + 'children': [], + 'meta': { + 'background': BACKGROUND_TRANSPARENT, + }, + } + + +def get_chart_holder(position): + size_x = position['size_x'] + size_y = position['size_y'] + slice_id = position['slice_id'] + slice_name = position.get('slice_name') + code = position.get('code') + + width = max( + GRID_MIN_COLUMN_COUNT, + int(round(size_x / GRID_RATIO)) + ) + height = max( + GRID_MIN_ROW_UNITS, + int(round(((size_y / GRID_RATIO) * 100) / ROW_HEIGHT)) + ) + if code is not None: + markdown_content = ' ' # white-space markdown + if len(code): + markdown_content = code + elif slice_name.strip(): + markdown_content = '##### {}'.format(slice_name) + + return { + 'type': MARKDOWN_TYPE, + 'id': 'DASHBOARD_MARKDOWN_TYPE-{}'.format(generate_id()), + 'children': [], + 'meta': { + 'width': width, + 'height': height, + 'code': markdown_content, + } + } + + return { + 'type': CHART_TYPE, + 'id': 'DASHBOARD_CHART_TYPE-{}'.format(generate_id()), + 'children': [], + 'meta': { + 'width': width, + 'height': height, + 'chartId': int(slice_id), + }, + } + + +def get_children_max(children, attr, root): + return max([root[childId]['meta'][attr] for childId in children]) + + +def get_children_sum(children, attr, root): + return reduce( + (lambda sum, childId: sum + root[childId]['meta'][attr]), + children, + 0 + ) + + +# find column that: width > 2 and +# each row has at least 1 chart can reduce width +def get_wide_column_ids(children, root): + return list( + filter( + lambda childId: can_reduce_column_width(root[childId], root), + children + ) + ) + + +def is_wide_leaf_component(component): + return ( + component['type'] in [CHART_TYPE, MARKDOWN_TYPE] and + component['meta']['width'] > GRID_MIN_COLUMN_COUNT + ) + + +def can_reduce_column_width(column_component, root): + return ( + column_component['type'] == COLUMN_TYPE and + column_component['meta']['width'] > GRID_MIN_COLUMN_COUNT and + all([ + is_wide_leaf_component(root[childId]) or ( + root[childId]['type'] == ROW_TYPE and + all([ + is_wide_leaf_component(root[id]) for id in root[childId]['children'] + ]) + ) for childId in column_component['children'] + ]) + ) + + +def reduce_row_width(row_component, root): + wide_leaf_component_ids = list( + filter( + lambda childId: is_wide_leaf_component(root[childId]), + row_component['children'] + ) + ) + + widest_chart_id = None + widest_width = 0 + for component_id in wide_leaf_component_ids: + if root[component_id]['meta']['width'] > widest_width: + widest_width = root[component_id]['meta']['width'] + widest_chart_id = component_id + + if widest_chart_id: + root[widest_chart_id]['meta']['width'] -= 1 + + return get_children_sum(row_component['children'], 'width', root) + + +def reduce_component_width(component): + if is_wide_leaf_component(component): + component['meta']['width'] -= 1 + return component['meta']['width'] + + +def convert(positions, level, parent, root): + if len(positions) == 0: + return + + if len(positions) == 1 or level >= MAX_RECURSIVE_LEVEL: + # special treatment for single chart dash: + # always wrap chart inside a row + if parent['type'] == DASHBOARD_GRID_TYPE: + row_container = get_row_container() + root[row_container['id']] = row_container + parent['children'].append(row_container['id']) + parent = row_container + + chart_holder = get_chart_holder(positions[0]) + root[chart_holder['id']] = chart_holder + parent['children'].append(chart_holder['id']) + return + + current_positions = positions[:] + boundary = get_boundary(current_positions) + top = boundary['top'] + bottom = boundary['bottom'] + left = boundary['left'] + right = boundary['right'] + + # find row dividers + layers = [] + current_row = top + 1 + while len(current_positions) and current_row <= bottom: + upper = [] + lower = [] + + is_row_divider = True + for position in current_positions: + row = position['row'] + size_y = position['size_y'] + if row + size_y <= current_row: + lower.append(position) + continue + elif row >= current_row: + upper.append(position) + continue + is_row_divider = False + break + + if is_row_divider: + current_positions = upper[:] + layers.append(lower) + current_row += 1 + + # Each layer is a list of positions belong to same row section + # they can be a list of charts, or arranged in columns, or mixed + for layer in layers: + if len(layer) == 0: + return + + if len(layer) == 1 and parent['type'] == COLUMN_TYPE: + chart_holder = get_chart_holder(layer[0]) + root[chart_holder['id']] = chart_holder + parent['children'].append(chart_holder['id']) + return + + # create a new row + row_container = get_row_container() + root[row_container['id']] = row_container + parent['children'].append(row_container['id']) + + current_positions = layer[:] + if not has_overlap(current_positions): + # this is a list of charts in the same row + sorted_by_col = sorted( + current_positions, + key=lambda pos: pos['col'], + ) + for position in sorted_by_col: + chart_holder = get_chart_holder(position) + root[chart_holder['id']] = chart_holder + row_container['children'].append(chart_holder['id']) + else: + # this row has columns, find col dividers + current_col = left + 1 + while len(current_positions) and current_col <= right: + upper = [] + lower = [] + + is_col_divider = True + for position in current_positions: + col = position['col'] + size_x = position['size_x'] + if col + size_x <= current_col: + lower.append(position) + continue + elif col >= current_col: + upper.append(position) + continue + is_col_divider = False + break + + if is_col_divider: + # is single chart in the column: + # add to parent container without create new column container + if len(lower) == 1: + chart_holder = get_chart_holder(lower[0]) + root[chart_holder['id']] = chart_holder + row_container['children'].append(chart_holder['id']) + else: + # create new col container + col_container = get_col_container() + root[col_container['id']] = col_container + + if not has_overlap(lower, False): + sorted_by_row = sorted( + lower, + key=lambda pos: pos['row'], + ) + for position in sorted_by_row: + chart_holder = get_chart_holder(position) + root[chart_holder['id']] = chart_holder + col_container['children'].append(chart_holder['id']) + else: + convert(lower, level + 2, col_container, root) + + # add col meta + if len(col_container['children']): + row_container['children'].append(col_container['id']) + col_container['meta']['width'] = get_children_max( + col_container['children'], + 'width', + root, + ) + + current_positions = upper[:] + current_col += 1 + + # add row meta + row_container['meta']['width'] = get_children_sum( + row_container['children'], + 'width', + root, + ) + + +def convert_to_layout(positions): + root = get_empty_layout() + + convert(positions, 0, root[DASHBOARD_GRID_ID], root) + + # remove row's width, height and col's height from its meta data + # and make sure every row's width <= GRID_COLUMN_COUNT + # Each item is a dashboard component: + # row_container, or col_container, or chart_holder + for item in root.values(): + if not isinstance(item, dict): + continue + + if ROW_TYPE == item['type']: + meta = item['meta'] + if meta.get('width', 0) > GRID_COLUMN_COUNT: + current_width = meta['width'] + while ( + current_width > GRID_COLUMN_COUNT and + len(list(filter( + lambda childId: is_wide_leaf_component(root[childId]), + item['children'], + ))) + ): + current_width = reduce_row_width(item, root) + + # because we round v1 chart size to nearest v2 grids count, result + # in there might be overall row width > GRID_COLUMN_COUNT. + # So here is an extra step to check row width, and reduce chart + # or column width if needed and if possible. + if current_width > GRID_COLUMN_COUNT: + has_wide_columns = True + while has_wide_columns: + col_ids = get_wide_column_ids(item['children'], root) + idx = 0 + # need 2nd loop since same column may reduce multiple times + while idx < len(col_ids) and current_width > GRID_COLUMN_COUNT: + current_column = col_ids[idx] + for childId in root[current_column]['children']: + if root[childId]['type'] == ROW_TYPE: + root[childId]['meta']['width'] = reduce_row_width( + root[childId], root + ) + else: + root[childId]['meta']['width'] = \ + reduce_component_width(root[childId]) + + root[current_column]['meta']['width'] = get_children_max( + root[current_column]['children'], + 'width', + root + ) + current_width = get_children_sum( + item['children'], + 'width', + root + ) + idx += 1 + + has_wide_columns = ( + len(get_wide_column_ids(item['children'], root)) and + current_width > GRID_COLUMN_COUNT + ) + + meta.pop('width', None) + + return root + + +def merge_position(position, bottom_line, last_column_start): + col = position['col'] + size_x = position['size_x'] + size_y = position['size_y'] + end_column = len(bottom_line) \ + if col + size_x > last_column_start \ + else col + size_x + + # finding index where index >= col and bottom_line value > bottom_line[col] + taller_indexes = [i for i, value in enumerate(bottom_line) + if (i >= col and value > bottom_line[col])] + + current_row_value = bottom_line[col] + # if no enough space to fit current position, will start from taller row value + if len(taller_indexes) > 0 and (taller_indexes[0] - col + 1) < size_x: + current_row_value = max(bottom_line[col:col + size_x]) + + # add current row value with size_y of this position + for i in range(col, end_column): + bottom_line[i] = current_row_value + size_y + + +# In original position data, a lot of position's row attribute are problematic, +# for example, same positions are assigned to more than 1 chart. +# The convert function depends on row id, col id to split the whole dashboard into +# nested rows and columns. Bad row id will lead to many empty spaces, or a few charts +# are overlapped in the same row. +# This function read positions by row first. +# Then based on previous col id, width and height attribute, +# re-calculate next position's row id. +def scan_dashboard_positions_data(positions): + positions_by_row_id = {} + for position in positions: + row = position['row'] + position['col'] = min(position['col'], TOTAL_COLUMNS) + if not positions_by_row_id.get(row): + positions_by_row_id[row] = [] + positions_by_row_id[row].append(position) + + bottom_line = [0] * (TOTAL_COLUMNS + 1) + # col index always starts from 1, set a large number for [0] as placeholder + bottom_line[0] = MAX_VALUE + last_column_start = max([position['col'] for position in positions]) + + # ordered_raw_positions are arrays of raw positions data sorted by row id + ordered_raw_positions = [] + row_ids = sorted(positions_by_row_id.keys()) + for row_id in row_ids: + ordered_raw_positions.append(positions_by_row_id[row_id]) + updated_positions = [] + + while len(ordered_raw_positions): + next_row = ordered_raw_positions.pop(0) + next_col = 1 + while len(next_row): + # special treatment for same (row, col) assigned to more than 1 chart: + # add one additional row and display wider chart first + available_columns_index = [i for i, e in enumerate( + list(filter(lambda x: x['col'] == next_col, next_row)))] + + if len(available_columns_index): + idx = available_columns_index[0] + if len(available_columns_index) > 1: + idx = sorted( + available_columns_index, + key=lambda x: next_row[x]['size_x'], + reverse=True + )[0] + + next_position = next_row.pop(idx) + merge_position(next_position, bottom_line, last_column_start + 1) + next_position['row'] = \ + bottom_line[next_position['col']] - next_position['size_y'] + updated_positions.append(next_position) + next_col += next_position['size_x'] + else: + next_col = next_row[0]['col'] + + return updated_positions + + +def upgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + dashboards = session.query(Dashboard).all() + for i, dashboard in enumerate(dashboards): + print('scanning dashboard ({}/{}) >>>>'.format(i + 1, len(dashboards))) + position_json = json.loads(dashboard.position_json or '[]') + if not is_v2_dash(position_json): + print('Converting dashboard... dash_id: {}'.format(dashboard.id)) + position_dict = {} + positions = [] + slices = dashboard.slices + + if position_json: + # scan and fix positions data: extra spaces, dup rows, .etc + position_json = scan_dashboard_positions_data(position_json) + position_dict = \ + {str(position['slice_id']): position for position in position_json} + + last_row_id = max([pos['row'] + pos['size_y'] for pos in position_json]) \ + if position_json else 0 + new_slice_counter = 0 + for slice in slices: + position = position_dict.get(str(slice.id)) + + # some dashboard didn't have position_json + # place 3 charts in a row + if not position: + position = { + 'col': ( + new_slice_counter % NUMBER_OF_CHARTS_PER_ROW * + DEFAULT_CHART_WIDTH + 1 + ), + 'row': ( + last_row_id + + int(new_slice_counter / NUMBER_OF_CHARTS_PER_ROW) * + DEFAULT_CHART_WIDTH + ), + 'size_x': DEFAULT_CHART_WIDTH, + 'size_y': DEFAULT_CHART_WIDTH, + 'slice_id': str(slice.id), + } + new_slice_counter += 1 + + # attach additional parameters to position dict, + # prepare to replace markup and separator viz_type + # to dashboard UI component + form_data = json.loads(slice.params or '{}') + viz_type = slice.viz_type + if form_data and viz_type in ['markup', 'separator']: + position['code'] = form_data.get('code') + position['slice_name'] = slice.slice_name + + positions.append(position) + + v2_layout = convert_to_layout(positions) + v2_layout[DASHBOARD_HEADER_ID] = get_header_component(dashboard.dashboard_title) + + sorted_by_key = collections.OrderedDict(sorted(v2_layout.items())) + # print('converted position_json:\n {}'.format(json.dumps(sorted_by_key, indent=2))) + dashboard.position_json = json.dumps(sorted_by_key, indent=2) + session.merge(dashboard) + session.commit() + else: + print('Skip converted dash_id: {}'.format(dashboard.id)) + + session.close() + + +def downgrade(): + print('downgrade is done') diff --git a/superset/migrations/versions/c18bd4186f15_.py b/superset/migrations/versions/c18bd4186f15_.py new file mode 100644 index 0000000000000..cecb6f5422aa2 --- /dev/null +++ b/superset/migrations/versions/c18bd4186f15_.py @@ -0,0 +1,22 @@ +"""empty message + +Revision ID: c18bd4186f15 +Revises: ('46ba6aaaac97', 'ec1f88a35cc6') +Create Date: 2018-07-24 14:29:41.341098 + +""" + +# revision identifiers, used by Alembic. +revision = 'c18bd4186f15' +down_revision = ('46ba6aaaac97', 'ec1f88a35cc6') + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + pass + + +def downgrade(): + pass diff --git a/superset/migrations/versions/ec1f88a35cc6_.py b/superset/migrations/versions/ec1f88a35cc6_.py new file mode 100644 index 0000000000000..e84c6215a9426 --- /dev/null +++ b/superset/migrations/versions/ec1f88a35cc6_.py @@ -0,0 +1,22 @@ +"""empty message + +Revision ID: ec1f88a35cc6 +Revises: ('bebcf3fed1fe', '705732c70154') +Create Date: 2018-07-23 11:18:11.866106 + +""" + +# revision identifiers, used by Alembic. +revision = 'ec1f88a35cc6' +down_revision = ('bebcf3fed1fe', '705732c70154') + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + pass + + +def downgrade(): + pass diff --git a/superset/migrations/versions/fc480c87706c_.py b/superset/migrations/versions/fc480c87706c_.py new file mode 100644 index 0000000000000..85fbdf2fc192b --- /dev/null +++ b/superset/migrations/versions/fc480c87706c_.py @@ -0,0 +1,22 @@ +"""empty message + +Revision ID: fc480c87706c +Revises: ('4451805bbaa1', '1d9e835a84f9') +Create Date: 2018-07-22 11:50:54.174443 + +""" + +# revision identifiers, used by Alembic. +revision = 'fc480c87706c' +down_revision = ('4451805bbaa1', '1d9e835a84f9') + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + pass + + +def downgrade(): + pass