Skip to content

Commit

Permalink
Merge pull request #12 from kkdai/refine_struct
Browse files Browse the repository at this point in the history
Refine struct
  • Loading branch information
kkdai authored Nov 5, 2021
2 parents 3e80fa1 + 18144fe commit a9d1e5f
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 54 deletions.
12 changes: 12 additions & 0 deletions PostDoc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package photomgr

type PostDoc struct {
ArticleID string `json:"article_id" bson:"article_id"`
ArticleTitle string `json:"article_title" bson:"article_title"`
Author string `json:"author" bson:"author"`
Date string `json:"date" bson:"date"`
URL string `json:"url" bson:"url"`
ImageLinks []string `json:"image_links" bson:"image_links"`
Likeint int `json:"likeint" bson:"likeint"`
Dislikeint int `json:"dislikeint" bson:"dislikeint"`
}
20 changes: 9 additions & 11 deletions base.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ type baseCrawler struct {
baseAddress string
entryAddress string

//To store current baseCrawler post result
storedPostURLList []string
storedPostTitleList []string
storedStarList []int
// //To store current baseCrawler post result
storedPost []PostDoc
}

var (
Expand All @@ -36,32 +34,32 @@ func (b *baseCrawler) HasValidURL(url string) bool {

// Return parse page result count, it will be 0 if you still not parse any page
func (b *baseCrawler) GetCurrentPageResultCount() int {
return len(b.storedPostTitleList)
return len(b.storedPost)
}

// Get post title by index in current parsed page
func (b *baseCrawler) GetPostTitleByIndex(postIndex int) string {
if postIndex >= len(b.storedPostTitleList) {
if postIndex >= len(b.storedPost) {
return ""
}
return b.storedPostTitleList[postIndex]
return b.storedPost[postIndex].ArticleTitle
}

// Get post URL by index in current parsed page
func (b *baseCrawler) GetPostUrlByIndex(postIndex int) string {
if postIndex >= len(b.storedPostURLList) {
if postIndex >= len(b.storedPost) {
return ""
}

return b.storedPostURLList[postIndex]
return b.storedPost[postIndex].URL
}

// Get post like count by index in current parsed page
func (b *baseCrawler) GetPostStarByIndex(postIndex int) int {
if postIndex >= len(b.storedStarList) {
if postIndex >= len(b.storedPost) {
return 0
}
return b.storedStarList[postIndex]
return b.storedPost[postIndex].Likeint
}

func exists(path string) (bool, error) {
Expand Down
18 changes: 10 additions & 8 deletions ck101.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ func (p *CK101) ParseCK101PageByIndex(page int) int {
log.Fatal(err)
}

urlList := make([]string, 0)
postList := make([]string, 0)
posts := make([]PostDoc, 0)

var PageWebSide string
page = page + 1 //one base
Expand All @@ -107,14 +106,17 @@ func (p *CK101) ParseCK101PageByIndex(page int) int {
url = fmt.Sprintf("%s/%s", p.baseAddress, goUrl)
})

urlList = append(urlList, url)
postList = append(postList, title)
})
newPost := PostDoc{
ArticleID: "",
ArticleTitle: title,
URL: url,
}

p.storedPostURLList = urlList
p.storedPostTitleList = postList
posts = append(posts, newPost)
})

return len(p.storedPostTitleList)
p.storedPost = posts
return len(p.storedPost)
}

// GetAllImageAddress: return all image address in current page.
Expand Down
21 changes: 10 additions & 11 deletions fb.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ func (p *FBAlbum) ParseFBAlbumPageByIndex(page int) int {
log.Fatal(err)
}

urlList := make([]string, 0)
postList := make([]string, 0)
starList := make([]int, 0)
posts := make([]PostDoc, 0)

var PageWebSide string
page = page + 1 //one base
Expand Down Expand Up @@ -107,16 +105,17 @@ func (p *FBAlbum) ParseFBAlbumPageByIndex(page int) int {
}
//}
})
urlList = append(urlList, url)
starList = append(starList, starInt)
postList = append(postList, title)
})

p.storedPostURLList = urlList
p.storedStarList = starList
p.storedPostTitleList = postList
newPost := PostDoc{
ArticleTitle: title,
URL: url,
Likeint: starInt,
}
posts = append(posts, newPost)
})

return len(p.storedPostTitleList)
p.storedPost = posts
return len(p.storedPost)
}

// GetAllImageAddress: return all image address in current page.
Expand Down
41 changes: 17 additions & 24 deletions ptt.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ type PTT struct {

//Handle base folder address to store images
BaseDir string

//To store current PTT post result
storedPostURLList []string
storedPostTitleList []string
storedStarList []int
}

func NewPTT() *PTT {
Expand Down Expand Up @@ -156,32 +151,32 @@ func (p *PTT) GetAllImageAddress(target string) []string {

// Return parse page result count, it will be 0 if you still not parse any page
func (p *PTT) GetCurrentPageResultCount() int {
return len(p.storedPostTitleList)
return len(p.storedPost)
}

// Get post title by index in current parsed page
func (p *PTT) GetPostTitleByIndex(postIndex int) string {
if postIndex >= len(p.storedPostTitleList) {
if postIndex >= len(p.storedPost) {
return ""
}
return p.storedPostTitleList[postIndex]
return p.storedPost[postIndex].ArticleTitle
}

// Get post URL by index in current parsed page
func (p *PTT) GetPostUrlByIndex(postIndex int) string {
if postIndex >= len(p.storedPostURLList) {
if postIndex >= len(p.storedPost) {
return ""
}

return p.storedPostURLList[postIndex]
return p.storedPost[postIndex].URL
}

// Get post like count by index in current parsed page
func (p *PTT) GetPostStarByIndex(postIndex int) int {
if postIndex >= len(p.storedStarList) {
if postIndex >= len(p.storedPost) {
return 0
}
return p.storedStarList[postIndex]
return p.storedPost[postIndex].Likeint
}

//Set Ptt board page index, fetch all post and return article count back
Expand All @@ -193,9 +188,7 @@ func (p *PTT) ParsePttPageByIndex(page int) int {
log.Fatal(err)
}

urlList := make([]string, 0)
postList := make([]string, 0)
starList := make([]int, 0)
posts := make([]PostDoc, 0)

maxPageNumberString := ""
var PageWebSide string
Expand Down Expand Up @@ -231,18 +224,18 @@ func (p *PTT) ParsePttPageByIndex(page int) int {
likeCount, _ := strconv.Atoi(s.Find(".nrec span").Text())
href, _ := s.Find(".title a").Attr("href")
link := p.baseAddress + href
urlList = append(urlList, link)
// log.Printf("%d:[%d★]%s\n", i, likeCount, title)
starList = append(starList, likeCount)
postList = append(postList, title)
newPost := PostDoc{
ArticleID: "",
ArticleTitle: title,
URL: link,
Likeint: likeCount,
}

posts = append(posts, newPost)
}
})

p.storedPostURLList = urlList
p.storedStarList = starList
p.storedPostTitleList = postList

return len(p.storedPostTitleList)
return len(p.storedPost)
}

func getResponseWithCookie(url string) *http.Response {
Expand Down

0 comments on commit a9d1e5f

Please sign in to comment.