-
Notifications
You must be signed in to change notification settings - Fork 0
/
base.go
117 lines (101 loc) · 2.45 KB
/
base.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package photomgr
import (
"image"
"image/gif"
"image/jpeg"
"image/png"
"log"
"net/http"
"os"
"path/filepath"
"regexp"
"sync"
)
type baseCrawler struct {
//Init on inherit class
baseAddress string
entryAddress string
// //To store current baseCrawler post result
storedPost []PostDoc
}
var (
threadId = regexp.MustCompile(`M.(\d*).`)
imageId = regexp.MustCompile(`([^\/]+)\.(png|jpg)`)
)
func (b *baseCrawler) HasValidURL(url string) bool {
return threadId.Match([]byte(url))
}
// Return parse page result count, it will be 0 if you still not parse any page
func (b *baseCrawler) GetCurrentPageResultCount() int {
return len(b.storedPost)
}
// Get post title by index in current parsed page
func (b *baseCrawler) GetPostTitleByIndex(postIndex int) string {
if postIndex >= len(b.storedPost) {
return ""
}
return b.storedPost[postIndex].ArticleTitle
}
// Get post URL by index in current parsed page
func (b *baseCrawler) GetPostUrlByIndex(postIndex int) string {
if postIndex >= len(b.storedPost) {
return ""
}
return b.storedPost[postIndex].URL
}
// Get post like count by index in current parsed page
func (b *baseCrawler) GetPostStarByIndex(postIndex int) int {
if postIndex >= len(b.storedPost) {
return 0
}
return b.storedPost[postIndex].Likeint
}
func exists(path string) (bool, error) {
_, err := os.Stat(path)
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return true, err
}
func (b *baseCrawler) worker(destDir string, linkChan chan string, wg *sync.WaitGroup) {
defer wg.Done()
for target := range linkChan {
resp, err := http.Get(target)
if err != nil {
log.Printf("Http.Get\nerror: %s\ntarget: %s\n", err, target)
continue
}
defer resp.Body.Close()
m, _, err := image.Decode(resp.Body)
if err != nil {
m, err = png.Decode(resp.Body)
if err != nil {
log.Printf("image.Decode\nerror: %s\ntarget: %s", err, target)
continue
}
}
// Ignore small images
bounds := m.Bounds()
if bounds.Size().X > 300 && bounds.Size().Y > 300 {
imgInfo := imageId.FindStringSubmatch(target)
finalPath := destDir + "/" + imgInfo[1] + "." + imgInfo[2]
out, err := os.Create(filepath.FromSlash(finalPath))
if err != nil {
log.Printf("os.Create\nerror: %s\n", err)
continue
}
defer out.Close()
switch imgInfo[2] {
case "jpg":
jpeg.Encode(out, m, nil)
case "png":
png.Encode(out, m)
case "gif":
gif.Encode(out, m, nil)
}
}
}
}