From b54d6dabeef4f59d1dabdca3aceae7d480afb386 Mon Sep 17 00:00:00 2001 From: Evan Lin Date: Mon, 8 Nov 2021 23:11:08 +0800 Subject: [PATCH] implement get title from target url. --- ptt.go | 19 +++++++++++++++++++ ptt_test.go | 18 ++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/ptt.go b/ptt.go index 83f88d4..4d41ab4 100644 --- a/ptt.go +++ b/ptt.go @@ -50,6 +50,25 @@ func (p *PTT) GetUrlPhotos(target string) []string { return resultSlice } +func (p *PTT) GetUrlTitle(target string) string { + // Get https response with setting cookie over18=1 + resp := getResponseWithCookie(target) + doc, err := goquery.NewDocumentFromResponse(resp) + if err != nil { + log.Println(err) + return "" + } + + //Title + articleTitle := "" + doc.Find(".article-metaline").Each(func(i int, s *goquery.Selection) { + if strings.Contains(s.Find(".article-meta-tag").Text(), "標題") { + articleTitle = s.Find(".article-meta-value").Text() + } + }) + return articleTitle +} + func (p *PTT) Crawler(target string, workerNum int) { // Get https response with setting cookie over18=1 resp := getResponseWithCookie(target) diff --git a/ptt_test.go b/ptt_test.go index 6f950d2..edc533f 100644 --- a/ptt_test.go +++ b/ptt_test.go @@ -40,16 +40,30 @@ func TestGetNumber(t *testing.T) { func TestURLPhoto(t *testing.T) { ptt := NewPTT() + ptt.ParsePttByNumber(6, 0) title := ptt.GetPostTitleByIndex(5) if CheckTitleWithBeauty(title) { url := ptt.GetPostUrlByIndex(5) ret := ptt.GetUrlPhotos(url) if !ptt.HasValidURL(url) { - t.Errorf("URLPhoto: URL is not correct") + t.Errorf("TestURLPhoto: URL is not correct") } if len(ret) == 0 { - t.Errorf("URLPhoto: No result") + t.Errorf("TestURLPhoto: No result") + } + } +} + +func TestURLTitle(t *testing.T) { + ptt := NewPTT() + ptt.ParsePttByNumber(6, 0) + title := ptt.GetPostTitleByIndex(5) + if CheckTitleWithBeauty(title) { + url := ptt.GetPostUrlByIndex(5) + urlTitle := ptt.GetUrlTitle(url) + if urlTitle == "" || !CheckTitleWithBeauty(urlTitle) { + t.Errorf("TestURLTitle: title is not correct url_title=%s title=%s\n", urlTitle, title) } } }