From 39940adcaaa73e661124cb80fb8dd57ea929dbaf Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Thu, 9 Feb 2023 10:57:03 +1100 Subject: [PATCH] html: parse comments per HTML spec Updates golang/go#58246 Change-Id: Iaba5ed65f5d244fd47372ef0c08fc4cdb5ed90f9 Reviewed-on: https://go-review.googlesource.com/c/net/+/466776 TryBot-Result: Gopher Robot Auto-Submit: Nigel Tao Reviewed-by: Damien Neil Run-TryBot: Nigel Tao Reviewed-by: Nigel Tao (INACTIVE; USE @golang.org INSTEAD) --- html/comment_test.go | 270 +++++++++++++++++++++++++++++++++++++++++++ html/token.go | 49 ++++++-- html/token_test.go | 37 +++++- 3 files changed, 347 insertions(+), 9 deletions(-) create mode 100644 html/comment_test.go diff --git a/html/comment_test.go b/html/comment_test.go new file mode 100644 index 0000000000..2c80bc748c --- /dev/null +++ b/html/comment_test.go @@ -0,0 +1,270 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "bytes" + "testing" +) + +// TestComments exhaustively tests every 'interesting' N-byte string is +// correctly parsed as a comment. N ranges from 4+1 to 4+suffixLen inclusive, +// where 4 is the length of the "") return + } else if c == '-' { + dashCount = 1 + beginning = false + continue } } } @@ -645,6 +649,35 @@ func (z *Tokenizer) readComment() { } } +func (z *Tokenizer) calculateAbruptCommentDataEnd() int { + raw := z.Raw() + const prefixLen = len("", }, - // Comments. + // Comments. See also func TestComments. { "comment0", "abcdef", @@ -376,6 +376,41 @@ var tokenTests = []tokenTest{ "az", "a$$z", }, + { + "comment16", + "az", + "a$$z", + }, + { + "comment17", + "a", + }, + { + "comment18", + "az", + "a$$z", + }, + { + "comment19", + "a", + }, + { + "comment20", + "az", + "a$$z", + }, + { + "comment21", + "az", + "a$$z", + }, + { + "comment22", + "az", + "a$$z", + }, // An attribute with a backslash. { "backslash",