Skip to content

Commit

Permalink
Merge pull request #15 from kkdai/url_title
Browse files Browse the repository at this point in the history
implement get title from target url.
  • Loading branch information
kkdai authored Nov 8, 2021
2 parents b4d418f + b54d6da commit 2331dc3
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
19 changes: 19 additions & 0 deletions ptt.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,25 @@ func (p *PTT) GetUrlPhotos(target string) []string {
return resultSlice
}

func (p *PTT) GetUrlTitle(target string) string {
// Get https response with setting cookie over18=1
resp := getResponseWithCookie(target)
doc, err := goquery.NewDocumentFromResponse(resp)
if err != nil {
log.Println(err)
return ""
}

//Title
articleTitle := ""
doc.Find(".article-metaline").Each(func(i int, s *goquery.Selection) {
if strings.Contains(s.Find(".article-meta-tag").Text(), "標題") {
articleTitle = s.Find(".article-meta-value").Text()
}
})
return articleTitle
}

func (p *PTT) Crawler(target string, workerNum int) {
// Get https response with setting cookie over18=1
resp := getResponseWithCookie(target)
Expand Down
18 changes: 16 additions & 2 deletions ptt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,30 @@ func TestGetNumber(t *testing.T) {

func TestURLPhoto(t *testing.T) {
ptt := NewPTT()
ptt.ParsePttByNumber(6, 0)
title := ptt.GetPostTitleByIndex(5)
if CheckTitleWithBeauty(title) {
url := ptt.GetPostUrlByIndex(5)
ret := ptt.GetUrlPhotos(url)
if !ptt.HasValidURL(url) {
t.Errorf("URLPhoto: URL is not correct")
t.Errorf("TestURLPhoto: URL is not correct")
}

if len(ret) == 0 {
t.Errorf("URLPhoto: No result")
t.Errorf("TestURLPhoto: No result")
}
}
}

func TestURLTitle(t *testing.T) {
ptt := NewPTT()
ptt.ParsePttByNumber(6, 0)
title := ptt.GetPostTitleByIndex(5)
if CheckTitleWithBeauty(title) {
url := ptt.GetPostUrlByIndex(5)
urlTitle := ptt.GetUrlTitle(url)
if urlTitle == "" || !CheckTitleWithBeauty(urlTitle) {
t.Errorf("TestURLTitle: title is not correct url_title=%s title=%s\n", urlTitle, title)
}
}
}
Expand Down

0 comments on commit 2331dc3

Please sign in to comment.