Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WiP] feat: backend for thumnail computation + caching + endpoints #7716

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions superset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def get_manifest():
# Setup the cache prior to registering the blueprints.
cache = setup_cache(app, conf.get("CACHE_CONFIG"))
tables_cache = setup_cache(app, conf.get("TABLE_NAMES_CACHE_CONFIG"))
thumbnail_cache = setup_cache(app, conf.get("THUMBNAIL_CACHE_CONFIG"))

for bp in conf.get("BLUEPRINTS"):
try:
Expand All @@ -120,6 +121,10 @@ def get_manifest():
if conf.get("SILENCE_FAB"):
logging.getLogger("flask_appbuilder").setLevel(logging.ERROR)

logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("selenium").setLevel(logging.ERROR)
logging.getLogger("PIL").setLevel(logging.ERROR)

if app.debug:
app.logger.setLevel(logging.DEBUG) # pylint: disable=no-member
else:
Expand Down
4 changes: 2 additions & 2 deletions superset/assets/stylesheets/less/cosmo/variables.less
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
@gray-darker: lighten(@gray-base, 13.5%);
@gray-dark: lighten(@gray-base, 20%);
@gray: lighten(@gray-base, 33.5%);
@gray-light: lighten(@gray-base, 70%);
@gray-lighter: lighten(@gray-base, 95%);
@gray-light: lighten(@gray-base, 80%);
@gray-lighter: lighten(@gray-base, 90%);

@brand-primary: #00A699;
@brand-success: #4AC15F;
Expand Down
69 changes: 69 additions & 0 deletions superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,75 @@ def load_test_users():
load_test_users_run()


@app.cli.command()
@click.option(
"--asynchronous",
"-a",
is_flag=True,
default=False,
help="Trigger commands to run remotely on a worker",
)
@click.option(
"--dashboards_only",
"-d",
is_flag=True,
default=False,
help="Only process dashboards",
)
@click.option(
"--charts_only", "-c", is_flag=True, default=False, help="Only process charts"
)
@click.option(
"--force",
"-f",
is_flag=True,
default=False,
help="Force refresh, even if previously cached",
)
@click.option("--id", "-i", multiple=True)
def compute_thumbnails(asynchronous, dashboards_only, charts_only, force, id):
"""Compute thumbnails"""
from superset.models import core as models
from superset.tasks.thumbnails import (
cache_chart_thumbnail,
cache_dashboard_thumbnail,
)

if not charts_only:
query = db.session.query(models.Dashboard)
if id:
query = query.filter(models.Dashboard.id.in_(id))
dashboards = query.all()
count = len(dashboards)
for i, dash in enumerate(dashboards):
if asynchronous:
func = cache_dashboard_thumbnail.delay
action = "Triggering"
else:
func = cache_dashboard_thumbnail
action = "Processing"
msg = f'{action} dashboard "{dash.dashboard_title}" ({i+1}/{count})'
click.secho(msg, fg="green")
func(dash.id, force=force)

if not dashboards_only:
query = db.session.query(models.Slice)
if id:
query = query.filter(models.Slice.id.in_(id))
slices = query.all()
count = len(slices)
for i, slc in enumerate(slices):
if asynchronous:
func = cache_chart_thumbnail.delay
action = "Triggering"
else:
func = cache_chart_thumbnail
action = "Processing"
msg = f'{action} chart "{slc.slice_name}" ({i+1}/{count})'
click.secho(msg, fg="green")
func(slc.id, force=force)


def load_test_users_run():
"""
Loads admin, alpha, and gamma user for testing purposes
Expand Down
2 changes: 2 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@
# you'll want to use a proper broker as specified here:
# http://docs.celeryproject.org/en/latest/getting-started/brokers/index.html

CELERYD_LOG_LEVEL = "DEBUG"


class CeleryConfig(object):
BROKER_URL = "sqla+sqlite:///celerydb.sqlite"
Expand Down
10 changes: 10 additions & 0 deletions superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ def short_data(self):
def select_star(self):
pass

@property
def data_summary(self):
return {
"datasource_name": self.datasource_name,
"type": self.type,
"schema": self.schema,
"id": self.id,
"explore_url": self.explore_url,
}

@property
def data(self):
"""Data representation of the datasource sent to the frontend"""
Expand Down
49 changes: 49 additions & 0 deletions superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from superset.models.helpers import AuditMixinNullable, ImportMixin
from superset.models.tags import ChartUpdater, DashboardUpdater, FavStarUpdater
from superset.models.user_attributes import UserAttribute
from superset.tasks.thumbnails import cache_dashboard_thumbnail
from superset.utils import cache as cache_util, core as utils
from superset.viz import viz_types
from urllib import parse # noqa
Expand Down Expand Up @@ -187,6 +188,26 @@ def cls_model(self):
def datasource(self):
return self.get_datasource

@property
def thumbnail_url(self):
# SHA here is to force bypassing the browser cache when chart has changed
sha = utils.md5_hex(self.params, 6)
return f"/thumb/chart/{self.id}/{sha}/"

@property
def thumbnail_img(self):
return Markup(f'<img width="75" src="{self.thumbnail_url}">')

@property
def thumbnail_link(self):
return Markup(
f"""
<a href="{self.thumbnail_url}?force=true">
{self.thumbnail_img}
</a>
"""
)

def clone(self):
return Slice(
slice_name=self.slice_name,
Expand Down Expand Up @@ -711,6 +732,34 @@ def export_dashboards(cls, dashboard_ids):
indent=4,
)

@property
def thumbnail_url(self):
# SHA here is to force bypassing the browser cache when chart has changed
sha = utils.md5_hex(self.position_json, 6)
return f"/thumb/dashboard/{self.id}/{sha}/"

@property
def thumbnail_img(self):
return Markup(f'<img width="150" src="{self.thumbnail_url}">')

@property
def thumbnail_link(self):
return Markup(
f"""
<a href="{self.thumbnail_url}?force=true">
{self.thumbnail_img}
</a>
"""
)


def event_after_dashboard_changed(mapper, connection, target):
cache_dashboard_thumbnail.delay(target.id, force=True)


sqla.event.listen(Dashboard, "before_insert", event_after_dashboard_changed)
sqla.event.listen(Dashboard, "before_update", event_after_dashboard_changed)


class Database(Model, AuditMixinNullable, ImportMixin):

Expand Down
7 changes: 6 additions & 1 deletion superset/tasks/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from sqlalchemy import and_, func

from superset import app, db
from superset.models.core import Dashboard, Log, Slice
from superset.models.tags import Tag, TaggedObject
from superset.tasks.celery_app import app as celery_app
from superset.utils.core import parse_human_datetime
Expand Down Expand Up @@ -132,6 +131,8 @@ class DummyStrategy(Strategy):

def get_urls(self):
session = db.create_scoped_session()
from superset.models.core import Slice

charts = session.query(Slice).all()

return [get_url(chart) for chart in charts]
Expand Down Expand Up @@ -166,6 +167,8 @@ def get_urls(self):
urls = []
session = db.create_scoped_session()

from superset.models.core import Dashboard, Log

records = (
session.query(Log.dashboard_id, func.count(Log.dashboard_id))
.filter(and_(Log.dashboard_id.isnot(None), Log.dttm >= self.since))
Expand Down Expand Up @@ -223,6 +226,8 @@ def get_urls(self):
)
.all()
)
from superset.models.core import Dashboard, Slice

dash_ids = [tagged_object.object_id for tagged_object in tagged_objects]
tagged_dashboards = session.query(Dashboard).filter(Dashboard.id.in_(dash_ids))
for dashboard in tagged_dashboards:
Expand Down
54 changes: 31 additions & 23 deletions superset/tasks/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import simplejson as json
from werkzeug.utils import parse_cookie

# Superset framework imports
from superset import app, db, security_manager
from superset.models.schedules import (
EmailDeliveryType,
Expand All @@ -46,7 +45,7 @@
SliceEmailReportFormat,
)
from superset.tasks.celery_app import app as celery_app
from superset.utils.core import get_email_address_list, send_email_smtp
from superset.utils import core as utils

# Globals
config = app.config
Expand All @@ -66,13 +65,13 @@ def _get_recipients(schedule):
to = schedule.recipients
yield (to, bcc)
else:
for to in get_email_address_list(schedule.recipients):
for to in utils.get_email_address_list(schedule.recipients):
yield (to, bcc)


def _deliver_email(schedule, subject, email):
for (to, bcc) in _get_recipients(schedule):
send_email_smtp(
utils.send_email_smtp(
to,
subject,
email.body,
Expand All @@ -85,16 +84,19 @@ def _deliver_email(schedule, subject, email):
)


def _generate_mail_content(schedule, screenshot, name, url):
if schedule.delivery_type == EmailDeliveryType.attachment:
def _generate_mail_content(delivery_type, screenshot, name, url):
config = app.config
if delivery_type == EmailDeliveryType.attachment:
images = None
data = {"screenshot.png": screenshot}
body = __(
'<b><a href="%(url)s">Explore in Superset</a></b><p></p>',
name=name,
url=url,
)
elif schedule.delivery_type == EmailDeliveryType.inline:
else:
# Implicit: delivery_type == EmailDeliveryType.inline:

# Get the domain from the 'From' address ..
# and make a message id without the < > in the ends
domain = parseaddr(config.get("SMTP_MAIL_FROM"))[1].split("@")[1]
Expand Down Expand Up @@ -239,13 +241,10 @@ def deliver_dashboard(schedule):
prefix=config.get("EMAIL_REPORTS_SUBJECT_PREFIX"),
title=dashboard.dashboard_title,
)
_deliver_email(_get_recipients(schedule), subject, email)

_deliver_email(schedule, subject, email)


def _get_slice_data(schedule):
slc = schedule.slice

def _get_slice_data(slc, delivery_type):
slice_url = _get_url_path(
"Superset.explore_json", csv="true", form_data=json.dumps({"slice_id": slc.id})
)
Expand All @@ -266,7 +265,7 @@ def _get_slice_data(schedule):
# TODO: Move to the csv module
rows = [r.split(b",") for r in response.content.splitlines()]

if schedule.delivery_type == EmailDeliveryType.inline:
if delivery_type == EmailDeliveryType.inline:
data = None

# Parse the csv file and generate HTML
Expand All @@ -280,7 +279,7 @@ def _get_slice_data(schedule):
link=url,
)

elif schedule.delivery_type == EmailDeliveryType.attachment:
elif delivery_type == EmailDeliveryType.attachment:
data = {__("%(name)s.csv", name=slc.slice_name): response.content}
body = __(
'<b><a href="%(url)s">Explore in Superset</a></b><p></p>',
Expand Down Expand Up @@ -326,24 +325,25 @@ def _get_slice_visualization(schedule):
return _generate_mail_content(schedule, screenshot, slc.slice_name, slice_url)


def deliver_slice(schedule):
def deliver_slice(slc, recipients, email_format, delivery_type):
"""
Given a schedule, delivery the slice as an email report
"""
if schedule.email_format == SliceEmailReportFormat.data:
email = _get_slice_data(schedule)
elif schedule.email_format == SliceEmailReportFormat.visualization:
email = _get_slice_visualization(schedule)
config = app.config
if email_format == SliceEmailReportFormat.data:
email = _get_slice_data(slc, delivery_type)
elif email_format == SliceEmailReportFormat.visualization:
email = _get_slice_visualization(slc, delivery_type)
else:
raise RuntimeError("Unknown email report format")

subject = __(
"%(prefix)s %(title)s",
prefix=config.get("EMAIL_REPORTS_SUBJECT_PREFIX"),
title=schedule.slice.slice_name,
title=slc.slice_name,
)

_deliver_email(schedule, subject, email)
_deliver_email(recipients, subject, email)


@celery_app.task(name="email_reports.send", bind=True, soft_time_limit=300)
Expand All @@ -362,9 +362,16 @@ def schedule_email_report(task, report_type, schedule_id, recipients=None):
schedule.recipients = recipients

if report_type == ScheduleType.dashboard.value:
deliver_dashboard(schedule)
deliver_dashboard(
schedule.dashboard, _get_recipients(schedule), schedule.delivery_type
)
elif report_type == ScheduleType.slice.value:
deliver_slice(schedule)
deliver_slice(
schedule.slice,
_get_recipients(schedule),
schedule.email_format,
schedule.delivery_type,
)
else:
raise RuntimeError("Unknown report type")

Expand Down Expand Up @@ -412,6 +419,7 @@ def schedule_window(report_type, start_at, stop_at, resolution):
@celery_app.task(name="email_reports.schedule_hourly")
def schedule_hourly():
""" Celery beat job meant to be invoked hourly """
config = app.config

if not config.get("ENABLE_SCHEDULED_EMAIL_REPORTS"):
logging.info("Scheduled email reports not enabled in config")
Expand Down
Loading