-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
html: add Node.{Ancestors,ChildNodes,Descendants}()
Adds iterators for the parents, immediate children, and all children of a Node respectively. Fixes golang/go#62113 Change-Id: Iab015872cc3a20fe5e7cae3bc90b89cba68cc3f8 GitHub-Last-Rev: d99de58 GitHub-Pull-Request: #215 Reviewed-on: https://go-review.googlesource.com/c/net/+/594195 Reviewed-by: Ian Lance Taylor <iant@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Ian Lance Taylor <iant@google.com> Reviewed-by: Damien Neil <dneil@google.com>
- Loading branch information
1 parent
4783315
commit 511cc3a
Showing
5 changed files
with
163 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
//go:build go1.23 | ||
|
||
package html | ||
|
||
import "iter" | ||
|
||
// Ancestors returns an iterator over the ancestors of n, starting with n.Parent. | ||
// | ||
// Mutating a Node or its parents while iterating may have unexpected results. | ||
func (n *Node) Ancestors() iter.Seq[*Node] { | ||
_ = n.Parent // eager nil check | ||
|
||
return func(yield func(*Node) bool) { | ||
for p := n.Parent; p != nil && yield(p); p = p.Parent { | ||
} | ||
} | ||
} | ||
|
||
// ChildNodes returns an iterator over the immediate children of n, | ||
// starting with n.FirstChild. | ||
// | ||
// Mutating a Node or its children while iterating may have unexpected results. | ||
func (n *Node) ChildNodes() iter.Seq[*Node] { | ||
_ = n.FirstChild // eager nil check | ||
|
||
return func(yield func(*Node) bool) { | ||
for c := n.FirstChild; c != nil && yield(c); c = c.NextSibling { | ||
} | ||
} | ||
|
||
} | ||
|
||
// Descendants returns an iterator over all nodes recursively beneath | ||
// n, excluding n itself. Nodes are visited in depth-first preorder. | ||
// | ||
// Mutating a Node or its descendants while iterating may have unexpected results. | ||
func (n *Node) Descendants() iter.Seq[*Node] { | ||
_ = n.FirstChild // eager nil check | ||
|
||
return func(yield func(*Node) bool) { | ||
n.descendants(yield) | ||
} | ||
} | ||
|
||
func (n *Node) descendants(yield func(*Node) bool) bool { | ||
for c := range n.ChildNodes() { | ||
if !yield(c) || !c.descendants(yield) { | ||
return false | ||
} | ||
} | ||
return true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
//go:build go1.23 | ||
|
||
package html | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
) | ||
|
||
func TestNode_ChildNodes(t *testing.T) { | ||
tests := []struct { | ||
in string | ||
want string | ||
}{ | ||
{"", ""}, | ||
{"<a></a>", "a"}, | ||
{"a", "a"}, | ||
{"<a></a><!--b-->", "a b"}, | ||
{"a<b></b>c", "a b c"}, | ||
{"a<b><!--c--></b>d", "a b d"}, | ||
{"<a><b>c<!--d-->e</b></a>f<!--g--><h>i</h>", "a f g h"}, | ||
} | ||
for _, test := range tests { | ||
doc, err := Parse(strings.NewReader(test.in)) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
// Drill to <html><head></head><body> | ||
n := doc.FirstChild.FirstChild.NextSibling | ||
var results []string | ||
for c := range n.ChildNodes() { | ||
results = append(results, c.Data) | ||
} | ||
if got := strings.Join(results, " "); got != test.want { | ||
t.Errorf("ChildNodes = %q, want %q", got, test.want) | ||
} | ||
} | ||
} | ||
|
||
func TestNode_Descendants(t *testing.T) { | ||
tests := []struct { | ||
in string | ||
want string | ||
}{ | ||
{"", ""}, | ||
{"<a></a>", "a"}, | ||
{"<a><b></b></a>", "a b"}, | ||
{"<a>b</a>", "a b"}, | ||
{"<a><!--b--></a>", "a b"}, | ||
{"<a>b<c></c>d</a>", "a b c d"}, | ||
{"<a>b<c><!--d--></c>e</a>", "a b c d e"}, | ||
{"<a><b><c>d<!--e-->f</c></b>g<!--h--><i>j</i></a>", "a b c d e f g h i j"}, | ||
} | ||
for _, test := range tests { | ||
doc, err := Parse(strings.NewReader(test.in)) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
// Drill to <html><head></head><body> | ||
n := doc.FirstChild.FirstChild.NextSibling | ||
var results []string | ||
for c := range n.Descendants() { | ||
results = append(results, c.Data) | ||
} | ||
if got := strings.Join(results, " "); got != test.want { | ||
t.Errorf("Descendants = %q; want: %q", got, test.want) | ||
} | ||
} | ||
} | ||
|
||
func TestNode_Ancestors(t *testing.T) { | ||
for _, size := range []int{0, 1, 2, 10, 100, 10_000} { | ||
n := buildChain(size) | ||
nParents := 0 | ||
for _ = range n.Ancestors() { | ||
nParents++ | ||
} | ||
if nParents != size { | ||
t.Errorf("number of Ancestors = %d; want: %d", nParents, size) | ||
} | ||
} | ||
} | ||
|
||
func buildChain(size int) *Node { | ||
child := new(Node) | ||
for range size { | ||
parent := child | ||
child = new(Node) | ||
parent.AppendChild(child) | ||
} | ||
return child | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters