-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathboard.go
141 lines (123 loc) · 3.43 KB
/
board.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package ginside
import (
"context"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/pkg/errors"
)
// Post contains information about a single dcinside post
type Post struct {
ID string
Subject string
Title string
Author string
Date time.Time
Hits int
Votes int
URL string
}
// BoardMinorPosts returns the posts from the first page of a dcgall minor board
func (g *GInside) BoardMinorPosts(ctx context.Context, id string, recommended bool) (posts []Post, err error) {
return boardPostsWithPath(
ctx, g.httpClient, boardMinorPath(id, 1, recommended),
)
}
// BoardPosts returns the posts from the first page of a dcgall board
func (g *GInside) BoardPosts(ctx context.Context, id string, recommended bool) (posts []Post, err error) {
return boardPostsWithPath(
ctx, g.httpClient, boardPath(id, 1, recommended),
)
}
// boardPostsWithPath returns the posts from the first page of a dcgall board at the given path
func boardPostsWithPath(ctx context.Context, client *http.Client, path string) (posts []Post, err error) {
res, err := makeRequest(ctx, client, path)
if err != nil {
return nil, err
}
defer res.Body.Close() // nolint: errcheck
// parse html
mainDoc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, err
}
mainDocHTML, err := mainDoc.Html()
if err != nil {
return nil, err
}
// follow JavaScript forwarding if necessary
parts := forwardRegex.FindStringSubmatch(mainDocHTML)
if len(parts) >= 2 {
return boardPostsWithPath(ctx, client, parts[1])
}
// parse posts
entries := mainDoc.Find(".gall_list tbody tr")
// parse post content
for _, entry := range entries.Nodes {
entryNode := goquery.NewDocumentFromNode(entry)
subjectHTML, err := entryNode.Find("td.gall_subject").Html()
if err != nil {
return nil, err
}
noticeID := entryNode.Find("td.gall_num").Text()
subject := entryNode.Find("td.gall_subject").Text()
title := entryNode.Find("td.gall_tit a").First().Text()
// skip pinned
if strings.Contains(subjectHTML, "<b>") {
continue
}
_, err = strconv.Atoi(noticeID)
if err != nil {
continue
}
link, ok := entryNode.Find("td.gall_tit a").Attr("href")
if !ok {
return nil, errors.New("unable to find link")
}
if !strings.HasPrefix(link, "http") {
link = baseURL + link
}
if strings.Contains(link, "javascript:;") {
continue
}
parsedLink, err := url.Parse(link)
if err != nil {
return nil, err
}
// remove page and exception_mode from final url
newQueries := parsedLink.Query()
newQueries.Del("page")
newQueries.Del("exception_mode")
parsedLink.RawQuery = newQueries.Encode()
author := entryNode.Find("td.gall_writer .nickname").Text()
var date time.Time
dateText, ok := entryNode.Find("td.gall_date").Attr("title")
if ok && dateText != "" {
date, err = parseDate(dateText)
} else {
dateText := entryNode.Find("td.gall_date").Text()
if dateText != "" {
date, err = parseDate(dateText)
}
}
hitsText := entryNode.Find("td.gall_count").Eq(0).Text()
hits, _ := strconv.Atoi(hitsText)
votesText := entryNode.Find("td.gall_recommend").Eq(1).Text()
votes, _ := strconv.Atoi(votesText)
// add to list of posts
posts = append(posts, Post{
ID: noticeID,
Subject: subject,
Title: title,
Author: author,
Date: date,
Hits: hits,
Votes: votes,
URL: parsedLink.String(),
})
}
return posts, nil
}