From 5a901dd506bc078b02b5320b5d6c837a387f9433 Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Fri, 15 Oct 2021 11:41:09 -0700 Subject: [PATCH] text: add checkbox to control server-side markdown conversion (#5378) * define plugin_util.safe_html() with no markdown interpretation * make plugin_util.safe_html() handle unicode vs bytes clearly * add markdown=false request parameter to disable markdown interpretation * fix bytes to str conversion bug in text plugin no-markdown codepath * text: add checkbox to control server-side markdown conversion * yarn fix-lint --- tensorboard/plugin_util.py | 16 +++++ tensorboard/plugin_util_test.py | 52 ++++++++++++++ tensorboard/plugins/text/text_plugin.py | 42 ++++++++---- tensorboard/plugins/text/text_plugin_test.py | 67 ++++++++++++++++--- .../plugins/text/tf_text_dashboard/BUILD | 1 + .../tf_text_dashboard/tf-text-dashboard.ts | 38 ++++++++++- .../text/tf_text_dashboard/tf-text-loader.ts | 4 ++ 7 files changed, 198 insertions(+), 22 deletions(-) diff --git a/tensorboard/plugin_util.py b/tensorboard/plugin_util.py index 5952fd5d4ca..130dc3a1dd2 100644 --- a/tensorboard/plugin_util.py +++ b/tensorboard/plugin_util.py @@ -90,6 +90,22 @@ def __init__(self): _CLEANER_STORE = _CleanerStore() +def safe_html(unsafe_string): + """Return the input as a str, sanitized for insertion into the DOM. + + Arguments: + unsafe_string: A Unicode string or UTF-8--encoded bytestring + possibly containing unsafe HTML markup. + + Returns: + A string containing safe HTML. + """ + total_null_bytes = 0 + if isinstance(unsafe_string, bytes): + unsafe_string = unsafe_string.decode("utf-8") + return _CLEANER_STORE.cleaner.clean(unsafe_string) + + def markdown_to_safe_html(markdown_string): """Convert Markdown to HTML that's safe to splice into the DOM. diff --git a/tensorboard/plugin_util_test.py b/tensorboard/plugin_util_test.py index c22b03e4ea2..3160a4cc1e4 100644 --- a/tensorboard/plugin_util_test.py +++ b/tensorboard/plugin_util_test.py @@ -22,6 +22,58 @@ from tensorboard.backend import experiment_id +class SafeHTMLTest(tb_test.TestCase): + def test_empty_input(self): + self.assertEqual(plugin_util.safe_html(""), "") + + def test_whitelisted_tags_and_attributes_allowed(self): + s = ( + 'Check out ' + "my website!" + ) + self.assertEqual(plugin_util.safe_html(s), "%s" % s) + + def test_arbitrary_tags_and_attributes_removed(self): + self.assertEqual( + plugin_util.safe_html( + "We should bring back the blink tag; " + '' + "sign the petition!" + ), + "We should bring back the " + "<blink>blink tag</blink>; " + 'sign the petition!', + ) + + def test_javascript_hrefs_sanitized(self): + self.assertEqual( + plugin_util.safe_html( + 'A sketchy link for you' + ), + "A sketchy link for you", + ) + + def test_byte_strings_interpreted_as_utf8(self): + s = "Look\u2014some UTF-8!".encode("utf-8") + assert isinstance(s, bytes), (type(s), bytes) + self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!") + + def test_unicode_strings_passed_through(self): + s = "Look\u2014some UTF-8!" + assert not isinstance(s, bytes), (type(s), bytes) + self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!") + + def test_null_bytes_stripped(self): + # If this function is mistakenly called with UTF-16 or UTF-32 encoded text, + # there will probably be a bunch of null bytes. Ensure these are stripped. + s = "un_der_score".encode("utf-32-le") + # UTF-32 encoding of ASCII will have 3 null bytes per char. 36 = 3 * 12. + self.assertEqual( + plugin_util.safe_html(s), + "un_der_score", + ) + + class MarkdownToSafeHTMLTest(tb_test.TestCase): def _test(self, markdown_string, expected): actual = plugin_util.markdown_to_safe_html(markdown_string) diff --git a/tensorboard/plugins/text/text_plugin.py b/tensorboard/plugins/text/text_plugin.py index 4574115ca3c..daf74b31461 100644 --- a/tensorboard/plugins/text/text_plugin.py +++ b/tensorboard/plugins/text/text_plugin.py @@ -152,7 +152,7 @@ def reduce_to_2d(arr): return arr[slices] -def text_array_to_html(text_arr): +def text_array_to_html(text_arr, enable_markdown): """Take a numpy.ndarray containing strings, and convert it into html. If the ndarray contains a single scalar string, that string is converted to @@ -164,29 +164,42 @@ def text_array_to_html(text_arr): Args: text_arr: A numpy.ndarray containing strings. + enable_markdown: boolean, whether to enable Markdown Returns: The array converted to html. """ if not text_arr.shape: - # It is a scalar. No need to put it in a table, just apply markdown - return plugin_util.markdown_to_safe_html(text_arr.item()) + # It is a scalar. No need to put it in a table. + if enable_markdown: + return plugin_util.markdown_to_safe_html(text_arr.item()) + else: + return plugin_util.safe_html(text_arr.item()) warning = "" if len(text_arr.shape) > 2: warning = plugin_util.markdown_to_safe_html( WARNING_TEMPLATE % len(text_arr.shape) ) text_arr = reduce_to_2d(text_arr) - table = plugin_util.markdowns_to_safe_html( - text_arr.reshape(-1), - lambda xs: make_table(np.array(xs).reshape(text_arr.shape)), - ) + if enable_markdown: + table = plugin_util.markdowns_to_safe_html( + text_arr.reshape(-1), + lambda xs: make_table(np.array(xs).reshape(text_arr.shape)), + ) + else: + # Convert utf-8 bytes to str. The built-in np.char.decode doesn't work on + # object arrays, and converting to an numpy chararray is lossy. + decode = lambda bs: bs.decode("utf-8") if isinstance(bs, bytes) else bs + text_arr_str = np.array( + [decode(bs) for bs in text_arr.reshape(-1)] + ).reshape(text_arr.shape) + table = plugin_util.safe_html(make_table(text_arr_str)) return warning + table -def process_event(wall_time, step, string_ndarray): +def process_event(wall_time, step, string_ndarray, enable_markdown): """Convert a text event into a JSON-compatible response.""" - html = text_array_to_html(string_ndarray) + html = text_array_to_html(string_ndarray, enable_markdown) return { "wall_time": wall_time, "step": step, @@ -242,7 +255,7 @@ def tags_route(self, request): index = self.index_impl(ctx, experiment) return http_util.Respond(request, index, "application/json") - def text_impl(self, ctx, run, tag, experiment): + def text_impl(self, ctx, run, tag, experiment, enable_markdown): all_text = self._data_provider.read_tensors( ctx, experiment_id=experiment, @@ -253,7 +266,10 @@ def text_impl(self, ctx, run, tag, experiment): text = all_text.get(run, {}).get(tag, None) if text is None: return [] - return [process_event(d.wall_time, d.step, d.numpy) for d in text] + return [ + process_event(d.wall_time, d.step, d.numpy, enable_markdown) + for d in text + ] @wrappers.Request.application def text_route(self, request): @@ -261,7 +277,9 @@ def text_route(self, request): experiment = plugin_util.experiment_id(request.environ) run = request.args.get("run") tag = request.args.get("tag") - response = self.text_impl(ctx, run, tag, experiment) + markdown_arg = request.args.get("markdown") + enable_markdown = markdown_arg != "false" # Default to enabled. + response = self.text_impl(ctx, run, tag, experiment, enable_markdown) return http_util.Respond(request, response, "application/json") def get_plugin_apps(self): diff --git a/tensorboard/plugins/text/text_plugin_test.py b/tensorboard/plugins/text/text_plugin_test.py index aeedeb71442..2187f10b681 100644 --- a/tensorboard/plugins/text/text_plugin_test.py +++ b/tensorboard/plugins/text/text_plugin_test.py @@ -76,7 +76,8 @@ def generate_testdata(self, logdir=None): writer.add_summary(summ, global_step=step) step += 1 - vector_message = ["one", "two", "three", "four"] + # Test unicode superscript 4. + vector_message = ["one", "two", "three", "\u2074"] summ = sess.run( vector_summary, feed_dict={placeholder: vector_message} ) @@ -101,22 +102,38 @@ def testIndex(self): def testText(self): plugin = self.load_plugin() fry = plugin.text_impl( - context.RequestContext(), "fry", "message", experiment="123" + context.RequestContext(), + "fry", + "message", + experiment="123", + enable_markdown=True, ) leela = plugin.text_impl( - context.RequestContext(), "leela", "message", experiment="123" + context.RequestContext(), + "leela", + "message", + experiment="123", + enable_markdown=False, ) self.assertEqual(len(fry), 4) self.assertEqual(len(leela), 4) for i in range(4): self.assertEqual(fry[i]["step"], i) self.assertEqual(leela[i]["step"], i) - - table = plugin.text_impl( - context.RequestContext(), "fry", "vector", experiment="123" + self.assertEqual( + fry[i]["text"], "

fry loves %s

" % GEMS[i] + ) + self.assertEqual(leela[i]["text"], "leela *loves* %s" % GEMS[i]) + + md_table = plugin.text_impl( + context.RequestContext(), + "fry", + "vector", + experiment="123", + enable_markdown=True, )[0]["text"] self.assertEqual( - table, + md_table, textwrap.dedent( """\ @@ -131,7 +148,37 @@ def testText(self): - + + + +

three

four

\u2074

+ """.rstrip() + ), + ) + plain_table = plugin.text_impl( + context.RequestContext(), + "fry", + "vector", + experiment="123", + enable_markdown=False, + )[0]["text"] + self.assertEqual( + plain_table, + textwrap.dedent( + """\ + + + + + + + + + + + + +
one
two
three
\u2074
@@ -297,7 +344,9 @@ def make_range_array(dim): np.testing.assert_array_equal(actual, expected) def test_text_array_to_html(self): - convert = text_plugin.text_array_to_html + convert = lambda x: text_plugin.text_array_to_html( + x, enable_markdown=True + ) scalar = np.array("foo") scalar_expected = "

foo

" self.assertEqual(convert(scalar), scalar_expected) diff --git a/tensorboard/plugins/text/tf_text_dashboard/BUILD b/tensorboard/plugins/text/tf_text_dashboard/BUILD index d2199e1c441..37bc28b7321 100644 --- a/tensorboard/plugins/text/tf_text_dashboard/BUILD +++ b/tensorboard/plugins/text/tf_text_dashboard/BUILD @@ -22,6 +22,7 @@ tf_ts_library( "//tensorboard/components/tf_markdown_view", "//tensorboard/components/tf_paginated_view", "//tensorboard/components/tf_runs_selector", + "//tensorboard/components/tf_storage", "@npm//@polymer/decorators", "@npm//@polymer/polymer", "@npm//@types/d3", diff --git a/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts b/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts index 2d25ee5d560..32ccad09d0b 100644 --- a/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts +++ b/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts @@ -30,7 +30,7 @@ import '../../../components/tf_dashboard_common/dashboard-style'; import '../../../components/tf_dashboard_common/tf-dashboard-layout'; import '../../../components/tf_paginated_view/tf-category-paginated-view'; import '../../../components/tf_runs_selector/tf-runs-selector'; - +import * as tf_storage from '../../../components/tf_storage/storage'; import './tf-text-loader'; @customElement('tf-text-dashboard') @@ -38,6 +38,13 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) { static readonly template = html`