From 243c8356e7fde9525545a7cf9921705bda04669f Mon Sep 17 00:00:00 2001 From: Dmitry Verkhoturov Date: Thu, 21 Jul 2022 22:58:24 +0200 Subject: [PATCH] do not sanitise the original comment markdown Previously it was sanitised using the HTML sanitiser, but it had proven troublesome and unnecessary. Remark42 rendered the markdown into proper HTML, but then some pieces of it (like cited HTML code inside the code block, marked by backticks) were cut out, which then showed the incorrect markdown to a user when they were editing the comment. For example, the comment "`foo`" became "foo" after sanitising, and despite the proper render user saw only "foo" when editing the comment. After this change, the initial comment markdown is preserved unaltered. It could contain dangerous HTML with JS, which I assume shouldn't be a problem as it's never rendered as HTML but instead supposed to be converted to HTML by the interpreter. In Remark42, it's stored in a comment.Text field and sanitised and thus safe. I've left information about the potential danger of rendering the original markdown as-is without an interpreter in all relevant places I could find. --- backend/app/rest/api/rest_private_test.go | 1 - backend/app/store/comment.go | 7 ++++--- site/src/docs/contributing/api/index.md | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/backend/app/rest/api/rest_private_test.go b/backend/app/rest/api/rest_private_test.go index f556efa54b..d3a6eecd8e 100644 --- a/backend/app/rest/api/rest_private_test.go +++ b/backend/app/rest/api/rest_private_test.go @@ -56,7 +56,6 @@ func TestRest_Create(t *testing.T) { // based on issue https://github.com/umputun/remark42/issues/1292 func TestRest_CreateFilteredCode(t *testing.T) { - t.Skip("not yet fixed") ts, _, teardown := startupT(t) defer teardown() diff --git a/backend/app/store/comment.go b/backend/app/store/comment.go index a04f545b89..d4726b6420 100644 --- a/backend/app/store/comment.go +++ b/backend/app/store/comment.go @@ -15,7 +15,7 @@ type Comment struct { ID string `json:"id" bson:"_id"` ParentID string `json:"pid"` Text string `json:"text"` - Orig string `json:"orig,omitempty"` + Orig string `json:"orig,omitempty"` // important: never render this as HTML! It's not sanitized. User User `json:"user"` Locator Locator `json:"locator"` Score int `json:"score"` @@ -112,7 +112,9 @@ func (c *Comment) SetDeleted(mode DeleteMode) { } } -// Sanitize clean dangerous html/js from the comment +// Sanitize clean dangerous html/js from the comment. +// Comment.Orig which is used to store the original comment text is not sanitized +// as we expect to never render it as HTML and render Comment.Text instead func (c *Comment) Sanitize() { p := bluemonday.UGCPolicy() p.AllowAttrs("class").Matching(regexp.MustCompile("^chroma$")).OnElements("pre") @@ -125,7 +127,6 @@ func (c *Comment) Sanitize() { p.AllowAttrs("class").Matching(regexp.MustCompile(codeSpanClassRegex)).OnElements("span") p.AllowAttrs("loading").Matching(regexp.MustCompile("^(lazy|eager)$")).OnElements("img") c.Text = p.Sanitize(c.Text) - c.Orig = p.Sanitize(c.Orig) c.User.ID = template.HTMLEscapeString(c.User.ID) c.User.Name = c.SanitizeText(c.User.Name) c.User.Picture = c.SanitizeAsURL(c.User.Picture) diff --git a/site/src/docs/contributing/api/index.md b/site/src/docs/contributing/api/index.md index bcca2fce74..975282d37f 100644 --- a/site/src/docs/contributing/api/index.md +++ b/site/src/docs/contributing/api/index.md @@ -28,7 +28,7 @@ type Comment struct { ID string `json:"id"` // comment ID, read only ParentID string `json:"pid"` // parent ID Text string `json:"text"` // comment text, after md processing - Orig string `json:"orig"` // original comment text + Orig string `json:"orig"` // original comment text in Markdown, should never be rendered as HTML as-is! User User `json:"user"` // user info, read only Locator Locator `json:"locator"` // post locator Score int `json:"score"` // comment score, read only @@ -83,7 +83,9 @@ type EditRequest struct { - `GET /api/v1/last/{max}?site=site-id&since=ts-msec` - get up to `{max}` last comments, `since` (epoch time, milliseconds) is optional - `GET /api/v1/id/{id}?site=site-id` - get comment by `comment id` -- `GET /api/v1/comments?site=site-id&user=id&limit=N` - get comment by `user id`, returns `response` object +- `GET /api/v1/comments?site=site-id&user=id&limit=N` - get comment by `user id`, returns `response` object. + +**Important**: original comment text in Markdown in the `orig` field should never be rendered as HTML as-is, only `text` containing HTML is sanitized and safe for render. ```go type response struct {