-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
html.go
98 lines (87 loc) · 1.95 KB
/
html.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*
Copyright © 2014–2020 Thomas Michael Edwards. All rights reserved.
Use of this source code is governed by a Simplified BSD License which
can be found in the LICENSE file.
*/
package main
import (
// standard packages
"bytes"
"regexp"
// external packages
"golang.org/x/net/html"
)
func getDocumentTree(source []byte) (*html.Node, error) {
return html.Parse(bytes.NewReader(source))
}
func getElementByID(node *html.Node, idPat string) *html.Node {
return getElementByIDAndTag(node, idPat, "")
}
func getElementByTag(node *html.Node, tag string) *html.Node {
return getElementByIDAndTag(node, "", tag)
}
func getElementByIDAndTag(node *html.Node, idPat, tag string) *html.Node {
if node == nil {
return nil
}
var (
tagOK = false
idOK = false
)
if node.Type == html.ElementNode {
if tag == "" || tag == node.Data {
tagOK = true
}
if idPat == "" {
idOK = true
} else if len(node.Attr) > 0 {
re := regexp.MustCompile(idPat)
for _, attr := range node.Attr {
if attr.Key == "id" && re.MatchString(attr.Val) {
idOK = true
}
}
}
if tagOK && idOK {
return node
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := getElementByIDAndTag(child, idPat, tag); result != nil {
return result
}
}
return nil
}
func hasAttr(node *html.Node, attrName string) bool {
for _, attr := range node.Attr {
if attrName == attr.Key {
return true
}
}
return false
}
func hasAttrRe(node *html.Node, attrRe *regexp.Regexp) bool {
for _, attr := range node.Attr {
if attrRe.MatchString(attr.Key) {
return true
}
}
return false
}
func getAttr(node *html.Node, attrName string) *html.Attribute {
for _, attr := range node.Attr {
if attrName == attr.Key {
return &attr
}
}
return nil
}
func getAttrRe(node *html.Node, attrRe *regexp.Regexp) *html.Attribute {
for _, attr := range node.Attr {
if attrRe.MatchString(attr.Key) {
return &attr
}
}
return nil
}