-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
90 lines (68 loc) · 1.86 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package main
import (
"context"
"errors"
"fmt"
"os"
"os/signal"
"scrapejsp/scrapejsp"
"sync"
"syscall"
// replace with your own project name
"github.com/tech-engine/goscrapy/cmd/gos"
)
// sample terminate function to demostrate spider termination.
func OnTerminate(fn func()) {
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
<-ctx.Done()
stop()
fn()
}
func main() {
ctx, cancel := context.WithCancel(context.Background())
var wg sync.WaitGroup
wg.Add(1)
// get core spider
gos := gos.New[*scrapejsp.Record]()
// use proxies
// proxies := gos.WithProxies("proxy_url1", "proxy_url2", ...)
// get core spider
// gos := gos.New[*scrapejsp.Record]().WithClient(
// gos.DefaultClient(proxies),
// )
// use middlewares
gos.MiddlewareManager.Add(scrapejsp.MIDDLEWARES...)
// use pipelines
gos.PipelineManager.Add(scrapejsp.PIPELINES...)
// export2Csv := pipelines.Export2CSV[*scrapejsp.Record](pipelines.Export2CSVOpts{
// Filename: "itstimeitsnowornever.csv",
// })
// // use export 2 json pipeline
// export2Json := pipelines.Export2JSON[*scrapejsp.Record](pipelines.Export2JSONOpts{
// Filename: "itstimeitsnowornever.json",
// Immediate: true,
// })
// add pipeline to group
// pipelineGroup := pm.NewGroup[*scrapejsp.Record]()
// pipelineGroup.Add(export2Csv)
// pipelineGroup.Add(export2Json)
// gos.PipelineManager.Add(
// pipelineGroup,
// )
go func() {
defer wg.Done()
err := gos.Start(ctx)
if err != nil && errors.Is(err, context.Canceled) {
return
}
fmt.Printf("failed: %q", err)
}()
spider := scrapejsp.NewSpider(gos)
// start the scraper with a job, currently nil is passed but you can pass your job here
spider.StartRequest(ctx, nil)
OnTerminate(func() {
fmt.Println("exit signal received: shutting down gracefully")
cancel()
wg.Wait()
})
}