-
Notifications
You must be signed in to change notification settings - Fork 240
/
main.go
88 lines (72 loc) · 2.64 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// Command logic is a chromedp example demonstrating more complex logic beyond
// simple actions.
package main
import (
"context"
"fmt"
"log"
"strings"
"time"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/chromedp"
)
func main() {
// create context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
// list awesome go projects for the "Selenium and browser control tools."
res, err := listAwesomeGoProjects(ctx, "Selenium and browser control tools.")
if err != nil {
log.Fatalf("could not list awesome go projects: %v", err)
}
// output the values
for k, v := range res {
log.Printf("project %s (%s): '%s'", k, v.URL, v.Description)
}
}
// projectDesc contains a url, description for a project.
type projectDesc struct {
URL, Description string
}
// listAwesomeGoProjects is the highest level logic for browsing to the
// awesome-go page, finding the specified section sect, and retrieving the
// associated projects from the page.
func listAwesomeGoProjects(ctx context.Context, sect string) (map[string]projectDesc, error) {
// force max timeout of 15 seconds for retrieving and processing the data
var cancel func()
ctx, cancel = context.WithTimeout(ctx, 15*time.Second)
defer cancel()
sel := fmt.Sprintf(`//p[text()[contains(., '%s')]]`, sect)
// navigate
if err := chromedp.Run(ctx, chromedp.Navigate(`https://github.com/avelino/awesome-go`)); err != nil {
return nil, fmt.Errorf("could not navigate to github: %v", err)
}
// wait visible
if err := chromedp.Run(ctx, chromedp.WaitVisible(sel)); err != nil {
return nil, fmt.Errorf("could not get section: %v", err)
}
sib := sel + `/following-sibling::ul/li`
// get project link text
var projects []*cdp.Node
if err := chromedp.Run(ctx, chromedp.Nodes(sib+`/child::a/text()`, &projects)); err != nil {
return nil, fmt.Errorf("could not get projects: %v", err)
}
// get links and description text
var linksAndDescriptions []*cdp.Node
if err := chromedp.Run(ctx, chromedp.Nodes(sib+`/child::node()`, &linksAndDescriptions)); err != nil {
return nil, fmt.Errorf("could not get links and descriptions: %v", err)
}
// check length
if 2*len(projects) != len(linksAndDescriptions) {
return nil, fmt.Errorf("projects and links and descriptions lengths do not match (2*%d != %d)", len(projects), len(linksAndDescriptions))
}
// process data
res := make(map[string]projectDesc)
for i := 0; i < len(projects); i++ {
res[projects[i].NodeValue] = projectDesc{
URL: linksAndDescriptions[2*i].AttributeValue("href"),
Description: strings.TrimPrefix(strings.TrimSpace(linksAndDescriptions[2*i+1].NodeValue), "- "),
}
}
return res, nil
}