Skip to content

Commit

Permalink
html: only render content literally in the HTML namespace
Browse files Browse the repository at this point in the history
Per the WHATWG HTML specification, section 13.3, only append the literal
content of a text node if we are in the HTML namespace.

Thanks to Mohammad Thoriq Aziz for reporting this issue.

Fixes golang/go#61615
Fixes CVE-2023-3978

Change-Id: I332152904d4e7646bd2441602bcbe591fc655fa4
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1942896
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
Reviewed-on: https://go-review.googlesource.com/c/net/+/514896
Reviewed-by: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
  • Loading branch information
rolandshoemaker authored and neild committed Aug 1, 2023
1 parent 63fe334 commit 8ffa475
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 14 deletions.
28 changes: 24 additions & 4 deletions html/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,8 @@ func render1(w writer, n *Node) error {
}
}

// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
// Render any child nodes
if childTextNodesAreLiteral(n) {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
Expand All @@ -213,7 +212,7 @@ func render1(w writer, n *Node) error {
// last element in the file, with no closing tag.
return plaintextAbort
}
default:
} else {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
Expand All @@ -231,6 +230,27 @@ func render1(w writer, n *Node) error {
return w.WriteByte('>')
}

func childTextNodesAreLiteral(n *Node) bool {
// Per WHATWG HTML 13.3, if the parent of the current node is a style,
// script, xmp, iframe, noembed, noframes, or plaintext element, and the
// current node is a text node, append the value of the node's data
// literally. The specification is not explicit about it, but we only
// enforce this if we are in the HTML namespace (i.e. when the namespace is
// "").
// NOTE: we also always include noscript elements, although the
// specification states that they should only be rendered as such if
// scripting is enabled for the node (which is not something we track).
if n.Namespace != "" {
return false
}
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
return true
default:
return false
}
}

// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
Expand Down
56 changes: 46 additions & 10 deletions html/render_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ package html

import (
"bytes"
"fmt"
"strings"
"testing"
)

Expand Down Expand Up @@ -108,16 +110,16 @@ func TestRenderer(t *testing.T) {
// just commentary. The "0:" prefixes are for easy cross-reference with
// the nodes array.
treeAsText := [...]string{
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
10: `. . "5"`,
11: `. . <blockquote>`,
12: `. . <br>`,
Expand Down Expand Up @@ -169,3 +171,37 @@ func TestRenderer(t *testing.T) {
t.Errorf("got vs want:\n%s\n%s\n", got, want)
}
}

func TestRenderTextNodes(t *testing.T) {
elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
for _, namespace := range []string{
"", // html
"svg",
"math",
} {
for _, e := range elements {
var namespaceOpen, namespaceClose string
if namespace != "" {
namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
}
doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
n, err := Parse(strings.NewReader(doc))
if err != nil {
t.Fatal(err)
}
b := bytes.NewBuffer(nil)
if err := Render(b, n); err != nil {
t.Fatal(err)
}

expected := doc
if namespace != "" {
expected = strings.Replace(expected, "&", "&amp;", 1)
}

if b.String() != expected {
t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
}
}
}
}

0 comments on commit 8ffa475

Please sign in to comment.