From 5a901dd506bc078b02b5320b5d6c837a387f9433 Mon Sep 17 00:00:00 2001
From: Nick Felt <nfelt@users.noreply.github.com>
Date: Fri, 15 Oct 2021 11:41:09 -0700
Subject: [PATCH] text: add checkbox to control server-side markdown conversion
 (#5378)

* define plugin_util.safe_html() with no markdown interpretation

* make plugin_util.safe_html() handle unicode vs bytes clearly

* add markdown=false request parameter to disable markdown interpretation

* fix bytes to str conversion bug in text plugin no-markdown codepath

* text: add checkbox to control server-side markdown conversion

* yarn fix-lint
---
 tensorboard/plugin_util.py                    | 16 +++++
 tensorboard/plugin_util_test.py               | 52 ++++++++++++++
 tensorboard/plugins/text/text_plugin.py       | 42 ++++++++----
 tensorboard/plugins/text/text_plugin_test.py  | 67 ++++++++++++++++---
 .../plugins/text/tf_text_dashboard/BUILD      |  1 +
 .../tf_text_dashboard/tf-text-dashboard.ts    | 38 ++++++++++-
 .../text/tf_text_dashboard/tf-text-loader.ts  |  4 ++
 7 files changed, 198 insertions(+), 22 deletions(-)
diff --git a/tensorboard/plugin_util.py b/tensorboard/plugin_util.py
index 5952fd5d4ca..130dc3a1dd2 100644
--- a/tensorboard/plugin_util.py
+++ b/tensorboard/plugin_util.py
@@ -90,6 +90,22 @@ def __init__(self):
 _CLEANER_STORE = _CleanerStore()
 
 
+def safe_html(unsafe_string):
+    """Return the input as a str, sanitized for insertion into the DOM.
+
+    Arguments:
+      unsafe_string: A Unicode string or UTF-8--encoded bytestring
+        possibly containing unsafe HTML markup.
+
+    Returns:
+      A string containing safe HTML.
+    """
+    total_null_bytes = 0
+    if isinstance(unsafe_string, bytes):
+        unsafe_string = unsafe_string.decode("utf-8")
+    return _CLEANER_STORE.cleaner.clean(unsafe_string)
+
+
 def markdown_to_safe_html(markdown_string):
     """Convert Markdown to HTML that's safe to splice into the DOM.
 
diff --git a/tensorboard/plugin_util_test.py b/tensorboard/plugin_util_test.py
index c22b03e4ea2..3160a4cc1e4 100644
--- a/tensorboard/plugin_util_test.py
+++ b/tensorboard/plugin_util_test.py
@@ -22,6 +22,58 @@
 from tensorboard.backend import experiment_id
 
 
+class SafeHTMLTest(tb_test.TestCase):
+    def test_empty_input(self):
+        self.assertEqual(plugin_util.safe_html(""), "")
+
+    def test_whitelisted_tags_and_attributes_allowed(self):
+        s = (
+            'Check out <a href="http://example.com" title="do it">'
+            "my website</a>!"
+        )
+        self.assertEqual(plugin_util.safe_html(s), "%s" % s)
+
+    def test_arbitrary_tags_and_attributes_removed(self):
+        self.assertEqual(
+            plugin_util.safe_html(
+                "We should bring back the <blink>blink tag</blink>; "
+                '<a name="bookmark" href="http://please-dont.com">'
+                "sign the petition!</a>"
+            ),
+            "We should bring back the "
+            "&lt;blink&gt;blink tag&lt;/blink&gt;; "
+            '<a href="http://please-dont.com">sign the petition!</a>',
+        )
+
+    def test_javascript_hrefs_sanitized(self):
+        self.assertEqual(
+            plugin_util.safe_html(
+                'A <a href="javascript:void0">sketchy link</a> for you'
+            ),
+            "A <a>sketchy link</a> for you",
+        )
+
+    def test_byte_strings_interpreted_as_utf8(self):
+        s = "Look\u2014some UTF-8!".encode("utf-8")
+        assert isinstance(s, bytes), (type(s), bytes)
+        self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!")
+
+    def test_unicode_strings_passed_through(self):
+        s = "Look\u2014some UTF-8!"
+        assert not isinstance(s, bytes), (type(s), bytes)
+        self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!")
+
+    def test_null_bytes_stripped(self):
+        # If this function is mistakenly called with UTF-16 or UTF-32 encoded text,
+        # there will probably be a bunch of null bytes. Ensure these are stripped.
+        s = "un_der_score".encode("utf-32-le")
+        # UTF-32 encoding of ASCII will have 3 null bytes per char. 36 = 3 * 12.
+        self.assertEqual(
+            plugin_util.safe_html(s),
+            "un_der_score",
+        )
+
+
 class MarkdownToSafeHTMLTest(tb_test.TestCase):
     def _test(self, markdown_string, expected):
         actual = plugin_util.markdown_to_safe_html(markdown_string)
diff --git a/tensorboard/plugins/text/text_plugin.py b/tensorboard/plugins/text/text_plugin.py
index 4574115ca3c..daf74b31461 100644
--- a/tensorboard/plugins/text/text_plugin.py
+++ b/tensorboard/plugins/text/text_plugin.py
@@ -152,7 +152,7 @@ def reduce_to_2d(arr):
     return arr[slices]
 
 
-def text_array_to_html(text_arr):
+def text_array_to_html(text_arr, enable_markdown):
     """Take a numpy.ndarray containing strings, and convert it into html.
 
     If the ndarray contains a single scalar string, that string is converted to
@@ -164,29 +164,42 @@ def text_array_to_html(text_arr):
 
     Args:
       text_arr: A numpy.ndarray containing strings.
+      enable_markdown: boolean, whether to enable Markdown
 
     Returns:
       The array converted to html.
     """
     if not text_arr.shape:
-        # It is a scalar. No need to put it in a table, just apply markdown
-        return plugin_util.markdown_to_safe_html(text_arr.item())
+        # It is a scalar. No need to put it in a table.
+        if enable_markdown:
+            return plugin_util.markdown_to_safe_html(text_arr.item())
+        else:
+            return plugin_util.safe_html(text_arr.item())
     warning = ""
     if len(text_arr.shape) > 2:
         warning = plugin_util.markdown_to_safe_html(
             WARNING_TEMPLATE % len(text_arr.shape)
         )
         text_arr = reduce_to_2d(text_arr)
-    table = plugin_util.markdowns_to_safe_html(
-        text_arr.reshape(-1),
-        lambda xs: make_table(np.array(xs).reshape(text_arr.shape)),
-    )
+    if enable_markdown:
+        table = plugin_util.markdowns_to_safe_html(
+            text_arr.reshape(-1),
+            lambda xs: make_table(np.array(xs).reshape(text_arr.shape)),
+        )
+    else:
+        # Convert utf-8 bytes to str. The built-in np.char.decode doesn't work on
+        # object arrays, and converting to an numpy chararray is lossy.
+        decode = lambda bs: bs.decode("utf-8") if isinstance(bs, bytes) else bs
+        text_arr_str = np.array(
+            [decode(bs) for bs in text_arr.reshape(-1)]
+        ).reshape(text_arr.shape)
+        table = plugin_util.safe_html(make_table(text_arr_str))
     return warning + table
 
 
-def process_event(wall_time, step, string_ndarray):
+def process_event(wall_time, step, string_ndarray, enable_markdown):
     """Convert a text event into a JSON-compatible response."""
-    html = text_array_to_html(string_ndarray)
+    html = text_array_to_html(string_ndarray, enable_markdown)
     return {
         "wall_time": wall_time,
         "step": step,
@@ -242,7 +255,7 @@ def tags_route(self, request):
         index = self.index_impl(ctx, experiment)
         return http_util.Respond(request, index, "application/json")
 
-    def text_impl(self, ctx, run, tag, experiment):
+    def text_impl(self, ctx, run, tag, experiment, enable_markdown):
         all_text = self._data_provider.read_tensors(
             ctx,
             experiment_id=experiment,
@@ -253,7 +266,10 @@ def text_impl(self, ctx, run, tag, experiment):
         text = all_text.get(run, {}).get(tag, None)
         if text is None:
             return []
-        return [process_event(d.wall_time, d.step, d.numpy) for d in text]
+        return [
+            process_event(d.wall_time, d.step, d.numpy, enable_markdown)
+            for d in text
+        ]
 
     @wrappers.Request.application
     def text_route(self, request):
@@ -261,7 +277,9 @@ def text_route(self, request):
         experiment = plugin_util.experiment_id(request.environ)
         run = request.args.get("run")
         tag = request.args.get("tag")
-        response = self.text_impl(ctx, run, tag, experiment)
+        markdown_arg = request.args.get("markdown")
+        enable_markdown = markdown_arg != "false"  # Default to enabled.
+        response = self.text_impl(ctx, run, tag, experiment, enable_markdown)
         return http_util.Respond(request, response, "application/json")
 
     def get_plugin_apps(self):
diff --git a/tensorboard/plugins/text/text_plugin_test.py b/tensorboard/plugins/text/text_plugin_test.py
index aeedeb71442..2187f10b681 100644
--- a/tensorboard/plugins/text/text_plugin_test.py
+++ b/tensorboard/plugins/text/text_plugin_test.py
@@ -76,7 +76,8 @@ def generate_testdata(self, logdir=None):
                     writer.add_summary(summ, global_step=step)
                     step += 1
 
-                vector_message = ["one", "two", "three", "four"]
+                # Test unicode superscript 4.
+                vector_message = ["one", "two", "three", "\u2074"]
                 summ = sess.run(
                     vector_summary, feed_dict={placeholder: vector_message}
                 )
@@ -101,22 +102,38 @@ def testIndex(self):
     def testText(self):
         plugin = self.load_plugin()
         fry = plugin.text_impl(
-            context.RequestContext(), "fry", "message", experiment="123"
+            context.RequestContext(),
+            "fry",
+            "message",
+            experiment="123",
+            enable_markdown=True,
         )
         leela = plugin.text_impl(
-            context.RequestContext(), "leela", "message", experiment="123"
+            context.RequestContext(),
+            "leela",
+            "message",
+            experiment="123",
+            enable_markdown=False,
         )
         self.assertEqual(len(fry), 4)
         self.assertEqual(len(leela), 4)
         for i in range(4):
             self.assertEqual(fry[i]["step"], i)
             self.assertEqual(leela[i]["step"], i)
-
-        table = plugin.text_impl(
-            context.RequestContext(), "fry", "vector", experiment="123"
+            self.assertEqual(
+                fry[i]["text"], "<p>fry <em>loves</em> %s</p>" % GEMS[i]
+            )
+            self.assertEqual(leela[i]["text"], "leela *loves* %s" % GEMS[i])
+
+        md_table = plugin.text_impl(
+            context.RequestContext(),
+            "fry",
+            "vector",
+            experiment="123",
+            enable_markdown=True,
         )[0]["text"]
         self.assertEqual(
-            table,
+            md_table,
             textwrap.dedent(
                 """\
                 <table>
@@ -131,7 +148,37 @@ def testText(self):
                 <td><p>three</p></td>
                 </tr>
                 <tr>
-                <td><p>four</p></td>
+                <td><p>\u2074</p></td>
+                </tr>
+                </tbody>
+                </table>
+                """.rstrip()
+            ),
+        )
+        plain_table = plugin.text_impl(
+            context.RequestContext(),
+            "fry",
+            "vector",
+            experiment="123",
+            enable_markdown=False,
+        )[0]["text"]
+        self.assertEqual(
+            plain_table,
+            textwrap.dedent(
+                """\
+                <table>
+                <tbody>
+                <tr>
+                <td>one</td>
+                </tr>
+                <tr>
+                <td>two</td>
+                </tr>
+                <tr>
+                <td>three</td>
+                </tr>
+                <tr>
+                <td>\u2074</td>
                 </tr>
                 </tbody>
                 </table>
@@ -297,7 +344,9 @@ def make_range_array(dim):
             np.testing.assert_array_equal(actual, expected)
 
     def test_text_array_to_html(self):
-        convert = text_plugin.text_array_to_html
+        convert = lambda x: text_plugin.text_array_to_html(
+            x, enable_markdown=True
+        )
         scalar = np.array("foo")
         scalar_expected = "<p>foo</p>"
         self.assertEqual(convert(scalar), scalar_expected)
diff --git a/tensorboard/plugins/text/tf_text_dashboard/BUILD b/tensorboard/plugins/text/tf_text_dashboard/BUILD
index d2199e1c441..37bc28b7321 100644
--- a/tensorboard/plugins/text/tf_text_dashboard/BUILD
+++ b/tensorboard/plugins/text/tf_text_dashboard/BUILD
@@ -22,6 +22,7 @@ tf_ts_library(
         "//tensorboard/components/tf_markdown_view",
         "//tensorboard/components/tf_paginated_view",
         "//tensorboard/components/tf_runs_selector",
+        "//tensorboard/components/tf_storage",
         "@npm//@polymer/decorators",
         "@npm//@polymer/polymer",
         "@npm//@types/d3",
diff --git a/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts b/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts
index 2d25ee5d560..32ccad09d0b 100644
--- a/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts
+++ b/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts
@@ -30,7 +30,7 @@ import '../../../components/tf_dashboard_common/dashboard-style';
 import '../../../components/tf_dashboard_common/tf-dashboard-layout';
 import '../../../components/tf_paginated_view/tf-category-paginated-view';
 import '../../../components/tf_runs_selector/tf-runs-selector';
-
+import * as tf_storage from '../../../components/tf_storage/storage';
 import './tf-text-loader';
 
 @customElement('tf-text-dashboard')
@@ -38,6 +38,13 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
   static readonly template = html`
     <tf-dashboard-layout>
       <div class="sidebar" slot="sidebar">
+        <div class="sidebar-section">
+          <div class="line-item">
+            <paper-checkbox checked="{{_markdownEnabled}}"
+              >Enable Markdown</paper-checkbox
+            >
+          </div>
+        </div>
         <div class="sidebar-section runs-selector">
           <tf-runs-selector selected-runs="{{_selectedRuns}}">
           </tf-runs-selector>
@@ -90,6 +97,7 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
                   tag="[[item.tag]]"
                   run="[[item.run]]"
                   request-manager="[[_requestManager]]"
+                  markdown-enabled="[[_markdownEnabled]]"
                 ></tf-text-loader>
               </template>
             </tf-category-paginated-view>
@@ -109,6 +117,18 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
   @property({type: Boolean})
   reloadOnReady: boolean = true;
 
+  @property({
+    type: Boolean,
+    notify: true,
+    observer: '_markdownEnabledStorageObserver',
+  })
+  _markdownEnabled: boolean = tf_storage
+    .getBooleanInitializer('_markdownEnabled', {
+      defaultValue: true,
+      useLocalStorage: true,
+    })
+    .call(this);
+
   @property({type: Array})
   _selectedRuns: string[];
 
@@ -130,6 +150,10 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
   @property({type: Object})
   _requestManager = new RequestManager();
 
+  static get observers() {
+    return ['_markdownEnabledObserver(_markdownEnabled)'];
+  }
+
   ready() {
     super.ready();
     if (this.reloadOnReady) this.reload();
@@ -174,4 +198,16 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
     var tagFilter = this._tagFilter;
     return categorizeRunTagCombinations(runToTag, selectedRuns, tagFilter);
   }
+
+  _markdownEnabledStorageObserver = tf_storage.getBooleanObserver(
+    '_markdownEnabled',
+    {
+      defaultValue: true,
+      useLocalStorage: true,
+    }
+  );
+
+  _markdownEnabledObserver() {
+    this._reloadTexts();
+  }
 }
diff --git a/tensorboard/plugins/text/tf_text_dashboard/tf-text-loader.ts b/tensorboard/plugins/text/tf_text_dashboard/tf-text-loader.ts
index 95bca787682..c521d445efa 100644
--- a/tensorboard/plugins/text/tf_text_dashboard/tf-text-loader.ts
+++ b/tensorboard/plugins/text/tf_text_dashboard/tf-text-loader.ts
@@ -104,6 +104,9 @@ class TfTextLoader extends LegacyElementMixin(PolymerElement) {
   @property({type: String})
   tag: string;
 
+  @property({type: Boolean})
+  markdownEnabled: boolean;
+
   // Ordered from newest to oldest.
   @property({type: Array})
   _texts: Array<{wall_time: Date; step: number; text: string}> = [];
@@ -141,6 +144,7 @@ class TfTextLoader extends LegacyElementMixin(PolymerElement) {
     const url = addParams(router.pluginRoute('text', '/text'), {
       tag: this.tag,
       run: this.run,
+      markdown: this.markdownEnabled ? 'true' : 'false',
     });
     const updateTexts = this._canceller.cancellable((result) => {
       if (result.cancelled) {