Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

text: add checkbox to control server-side markdown conversion #5378

Merged
merged 6 commits into from
Oct 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions tensorboard/plugin_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,22 @@ def __init__(self):
_CLEANER_STORE = _CleanerStore()


def safe_html(unsafe_string):
"""Return the input as a str, sanitized for insertion into the DOM.

Arguments:
unsafe_string: A Unicode string or UTF-8--encoded bytestring
possibly containing unsafe HTML markup.

Returns:
A string containing safe HTML.
"""
total_null_bytes = 0
if isinstance(unsafe_string, bytes):
unsafe_string = unsafe_string.decode("utf-8")
return _CLEANER_STORE.cleaner.clean(unsafe_string)


def markdown_to_safe_html(markdown_string):
"""Convert Markdown to HTML that's safe to splice into the DOM.

Expand Down
52 changes: 52 additions & 0 deletions tensorboard/plugin_util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,58 @@
from tensorboard.backend import experiment_id


class SafeHTMLTest(tb_test.TestCase):
def test_empty_input(self):
self.assertEqual(plugin_util.safe_html(""), "")

def test_whitelisted_tags_and_attributes_allowed(self):
s = (
'Check out <a href="http://example.com" title="do it">'
"my website</a>!"
)
self.assertEqual(plugin_util.safe_html(s), "%s" % s)

def test_arbitrary_tags_and_attributes_removed(self):
self.assertEqual(
plugin_util.safe_html(
"We should bring back the <blink>blink tag</blink>; "
'<a name="bookmark" href="http://please-dont.com">'
"sign the petition!</a>"
),
"We should bring back the "
"&lt;blink&gt;blink tag&lt;/blink&gt;; "
'<a href="http://please-dont.com">sign the petition!</a>',
)

def test_javascript_hrefs_sanitized(self):
self.assertEqual(
plugin_util.safe_html(
'A <a href="javascript:void0">sketchy link</a> for you'
),
"A <a>sketchy link</a> for you",
)

def test_byte_strings_interpreted_as_utf8(self):
s = "Look\u2014some UTF-8!".encode("utf-8")
assert isinstance(s, bytes), (type(s), bytes)
self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!")

def test_unicode_strings_passed_through(self):
s = "Look\u2014some UTF-8!"
assert not isinstance(s, bytes), (type(s), bytes)
self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!")

def test_null_bytes_stripped(self):
# If this function is mistakenly called with UTF-16 or UTF-32 encoded text,
# there will probably be a bunch of null bytes. Ensure these are stripped.
s = "un_der_score".encode("utf-32-le")
# UTF-32 encoding of ASCII will have 3 null bytes per char. 36 = 3 * 12.
self.assertEqual(
plugin_util.safe_html(s),
"un_der_score",
)


class MarkdownToSafeHTMLTest(tb_test.TestCase):
def _test(self, markdown_string, expected):
actual = plugin_util.markdown_to_safe_html(markdown_string)
Expand Down
42 changes: 30 additions & 12 deletions tensorboard/plugins/text/text_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def reduce_to_2d(arr):
return arr[slices]


def text_array_to_html(text_arr):
def text_array_to_html(text_arr, enable_markdown):
"""Take a numpy.ndarray containing strings, and convert it into html.

If the ndarray contains a single scalar string, that string is converted to
Expand All @@ -164,29 +164,42 @@ def text_array_to_html(text_arr):

Args:
text_arr: A numpy.ndarray containing strings.
enable_markdown: boolean, whether to enable Markdown

Returns:
The array converted to html.
"""
if not text_arr.shape:
# It is a scalar. No need to put it in a table, just apply markdown
return plugin_util.markdown_to_safe_html(text_arr.item())
# It is a scalar. No need to put it in a table.
if enable_markdown:
return plugin_util.markdown_to_safe_html(text_arr.item())
else:
return plugin_util.safe_html(text_arr.item())
warning = ""
if len(text_arr.shape) > 2:
warning = plugin_util.markdown_to_safe_html(
WARNING_TEMPLATE % len(text_arr.shape)
)
text_arr = reduce_to_2d(text_arr)
table = plugin_util.markdowns_to_safe_html(
text_arr.reshape(-1),
lambda xs: make_table(np.array(xs).reshape(text_arr.shape)),
)
if enable_markdown:
table = plugin_util.markdowns_to_safe_html(
text_arr.reshape(-1),
lambda xs: make_table(np.array(xs).reshape(text_arr.shape)),
)
else:
# Convert utf-8 bytes to str. The built-in np.char.decode doesn't work on
# object arrays, and converting to an numpy chararray is lossy.
decode = lambda bs: bs.decode("utf-8") if isinstance(bs, bytes) else bs
text_arr_str = np.array(
[decode(bs) for bs in text_arr.reshape(-1)]
).reshape(text_arr.shape)
Comment on lines +192 to +195
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do note that the markdown version makes sure it accounts for null characters resulting from decoding mismatch.

In tensorboard/plugin_util.py.

            source_decoded = source.decode("utf-8")
            # Remove null bytes and warn if there were any, since it probably means
            # we were given a bad encoding.
            source = source_decoded.replace("\x00", "")
            total_null_bytes += len(source_decoded) - len(source)
...

    warning = ""
    if total_null_bytes:
        warning = (
            "<!-- WARNING: discarded %d null bytes in markdown string "
            "after UTF-8 decoding -->\n"
        ) % total_null_bytes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I considered adding that to safe_html but wasn't sure it was warranted since the sanitizer strips the null bytes anyway (confirmed with a unit test). The reason given in the markdown version for doing the stripping is that otherwise, the markdown conversion gets confused by the null bytes:

def test_null_bytes_stripped_before_markdown_processing(self):
# If this function is mistakenly called with UTF-16 or UTF-32 encoded text,
# there will probably be a bunch of null bytes. These would be stripped by
# the sanitizer no matter what, but make sure we remove them before markdown
# interpretation to avoid affecting output (e.g. middle-word underscores
# would generate erroneous <em> tags like "un<em>der</em>score") and add an
# HTML comment with a warning.

So the only difference really is whether we insert the diagnostic HTML comment or not. That didn't quite seem worth any potential overhead to me, but I'm happy to add it back if you prefer?

table = plugin_util.safe_html(make_table(text_arr_str))
return warning + table


def process_event(wall_time, step, string_ndarray):
def process_event(wall_time, step, string_ndarray, enable_markdown):
"""Convert a text event into a JSON-compatible response."""
html = text_array_to_html(string_ndarray)
html = text_array_to_html(string_ndarray, enable_markdown)
return {
"wall_time": wall_time,
"step": step,
Expand Down Expand Up @@ -242,7 +255,7 @@ def tags_route(self, request):
index = self.index_impl(ctx, experiment)
return http_util.Respond(request, index, "application/json")

def text_impl(self, ctx, run, tag, experiment):
def text_impl(self, ctx, run, tag, experiment, enable_markdown):
all_text = self._data_provider.read_tensors(
ctx,
experiment_id=experiment,
Expand All @@ -253,15 +266,20 @@ def text_impl(self, ctx, run, tag, experiment):
text = all_text.get(run, {}).get(tag, None)
if text is None:
return []
return [process_event(d.wall_time, d.step, d.numpy) for d in text]
return [
process_event(d.wall_time, d.step, d.numpy, enable_markdown)
for d in text
]

@wrappers.Request.application
def text_route(self, request):
ctx = plugin_util.context(request.environ)
experiment = plugin_util.experiment_id(request.environ)
run = request.args.get("run")
tag = request.args.get("tag")
response = self.text_impl(ctx, run, tag, experiment)
markdown_arg = request.args.get("markdown")
enable_markdown = markdown_arg != "false" # Default to enabled.
response = self.text_impl(ctx, run, tag, experiment, enable_markdown)
return http_util.Respond(request, response, "application/json")

def get_plugin_apps(self):
Expand Down
67 changes: 58 additions & 9 deletions tensorboard/plugins/text/text_plugin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def generate_testdata(self, logdir=None):
writer.add_summary(summ, global_step=step)
step += 1

vector_message = ["one", "two", "three", "four"]
# Test unicode superscript 4.
vector_message = ["one", "two", "three", "\u2074"]
summ = sess.run(
vector_summary, feed_dict={placeholder: vector_message}
)
Expand All @@ -101,22 +102,38 @@ def testIndex(self):
def testText(self):
plugin = self.load_plugin()
fry = plugin.text_impl(
context.RequestContext(), "fry", "message", experiment="123"
context.RequestContext(),
"fry",
"message",
experiment="123",
enable_markdown=True,
)
leela = plugin.text_impl(
context.RequestContext(), "leela", "message", experiment="123"
context.RequestContext(),
"leela",
"message",
experiment="123",
enable_markdown=False,
)
self.assertEqual(len(fry), 4)
self.assertEqual(len(leela), 4)
for i in range(4):
self.assertEqual(fry[i]["step"], i)
self.assertEqual(leela[i]["step"], i)

table = plugin.text_impl(
context.RequestContext(), "fry", "vector", experiment="123"
self.assertEqual(
fry[i]["text"], "<p>fry <em>loves</em> %s</p>" % GEMS[i]
)
self.assertEqual(leela[i]["text"], "leela *loves* %s" % GEMS[i])

md_table = plugin.text_impl(
context.RequestContext(),
"fry",
"vector",
experiment="123",
enable_markdown=True,
)[0]["text"]
self.assertEqual(
table,
md_table,
textwrap.dedent(
"""\
<table>
Expand All @@ -131,7 +148,37 @@ def testText(self):
<td><p>three</p></td>
</tr>
<tr>
<td><p>four</p></td>
<td><p>\u2074</p></td>
</tr>
</tbody>
</table>
""".rstrip()
),
)
plain_table = plugin.text_impl(
context.RequestContext(),
"fry",
"vector",
experiment="123",
enable_markdown=False,
)[0]["text"]
self.assertEqual(
plain_table,
textwrap.dedent(
"""\
<table>
<tbody>
<tr>
<td>one</td>
</tr>
<tr>
<td>two</td>
</tr>
<tr>
<td>three</td>
</tr>
<tr>
<td>\u2074</td>
</tr>
</tbody>
</table>
Expand Down Expand Up @@ -297,7 +344,9 @@ def make_range_array(dim):
np.testing.assert_array_equal(actual, expected)

def test_text_array_to_html(self):
convert = text_plugin.text_array_to_html
convert = lambda x: text_plugin.text_array_to_html(
x, enable_markdown=True
)
scalar = np.array("foo")
scalar_expected = "<p>foo</p>"
self.assertEqual(convert(scalar), scalar_expected)
Expand Down
1 change: 1 addition & 0 deletions tensorboard/plugins/text/tf_text_dashboard/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tf_ts_library(
"//tensorboard/components/tf_markdown_view",
"//tensorboard/components/tf_paginated_view",
"//tensorboard/components/tf_runs_selector",
"//tensorboard/components/tf_storage",
"@npm//@polymer/decorators",
"@npm//@polymer/polymer",
"@npm//@types/d3",
Expand Down
38 changes: 37 additions & 1 deletion tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,21 @@ import '../../../components/tf_dashboard_common/dashboard-style';
import '../../../components/tf_dashboard_common/tf-dashboard-layout';
import '../../../components/tf_paginated_view/tf-category-paginated-view';
import '../../../components/tf_runs_selector/tf-runs-selector';

import * as tf_storage from '../../../components/tf_storage/storage';
import './tf-text-loader';

@customElement('tf-text-dashboard')
class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
static readonly template = html`
<tf-dashboard-layout>
<div class="sidebar" slot="sidebar">
<div class="sidebar-section">
<div class="line-item">
<paper-checkbox checked="{{_markdownEnabled}}"
>Enable Markdown</paper-checkbox
>
</div>
</div>
<div class="sidebar-section runs-selector">
<tf-runs-selector selected-runs="{{_selectedRuns}}">
</tf-runs-selector>
Expand Down Expand Up @@ -90,6 +97,7 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
tag="[[item.tag]]"
run="[[item.run]]"
request-manager="[[_requestManager]]"
markdown-enabled="[[_markdownEnabled]]"
></tf-text-loader>
</template>
</tf-category-paginated-view>
Expand All @@ -109,6 +117,18 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
@property({type: Boolean})
reloadOnReady: boolean = true;

@property({
type: Boolean,
notify: true,
observer: '_markdownEnabledStorageObserver',
})
_markdownEnabled: boolean = tf_storage
.getBooleanInitializer('_markdownEnabled', {
defaultValue: true,
useLocalStorage: true,
})
.call(this);

@property({type: Array})
_selectedRuns: string[];

Expand All @@ -130,6 +150,10 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
@property({type: Object})
_requestManager = new RequestManager();

static get observers() {
return ['_markdownEnabledObserver(_markdownEnabled)'];
}

ready() {
super.ready();
if (this.reloadOnReady) this.reload();
Expand Down Expand Up @@ -174,4 +198,16 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) {
var tagFilter = this._tagFilter;
return categorizeRunTagCombinations(runToTag, selectedRuns, tagFilter);
}

_markdownEnabledStorageObserver = tf_storage.getBooleanObserver(
'_markdownEnabled',
{
defaultValue: true,
useLocalStorage: true,
}
);

_markdownEnabledObserver() {
this._reloadTexts();
}
}
4 changes: 4 additions & 0 deletions tensorboard/plugins/text/tf_text_dashboard/tf-text-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ class TfTextLoader extends LegacyElementMixin(PolymerElement) {
@property({type: String})
tag: string;

@property({type: Boolean})
markdownEnabled: boolean;

// Ordered from newest to oldest.
@property({type: Array})
_texts: Array<{wall_time: Date; step: number; text: string}> = [];
Expand Down Expand Up @@ -141,6 +144,7 @@ class TfTextLoader extends LegacyElementMixin(PolymerElement) {
const url = addParams(router.pluginRoute('text', '/text'), {
tag: this.tag,
run: this.run,
markdown: this.markdownEnabled ? 'true' : 'false',
});
const updateTexts = this._canceller.cancellable((result) => {
if (result.cancelled) {
Expand Down