Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ability to Limit the Number of Scrapers Running Concurrently #1664

Merged
merged 2 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,18 @@ left arrow - cycles backwards in gallery
right arrow - cycles forward in gallery
esc - closes details pane

#### using Command Line Arguments
| Command line parameter | Type | Description |
|------------------------|-------|--------------
| `--enableLocalStorage` | boolean |Use local folder to store application data|
| `--app_dir` | String|path to the application directory|
| `--cache_dir` | String|path to the tempoarary scraper cache directory|
| `--imgproxy_dir` | String|path to the imageproxy directory|
| `--search_dir` | String| path to the Search Index directory|
| `--preview_dir` | String| path to the Scraper Cache directory|
| `--scriptsheatmap_dir` | String| path to the scripts_heatmap directory|
| `--myfiles_dir` | String| path to the myfiles directory for serving users own content (eg images|
| `--databaseurl` | String|override default database path|
| `--web_port` | Int| override default Web Page port 9999|
| `--ws_addr` | String| override default Websocket address from the default 0.0.0.0:9998|
#### using Command Line Arguments/Environment Variables
| Command line parameter | Environment Variable | Type | Description |
|------------------------|--------------|------|-------------|
| `--enableLocalStorage` | | boolean |Use local folder to store application data|
| `--app_dir` | XBVR_APPDIR | String|path to the application directory|
| `--cache_dir` | XBVR_CACHEDIR | String|path to the tempoarary scraper cache directory|
| `--imgproxy_dir` | XBVR_IMAGEPROXYDIR | String|path to the imageproxy directory|
| `--search_dir` | XBVR_SEARCHDIR | String| path to the Search Index directory|
| `--preview_dir` | XBVR_VIDEOPREVIEWDIR | String| path to the Scraper Cache directory|
| `--scriptsheatmap_dir` | XBVR_SCRIPTHEATMAPDIR | String| path to the scripts_heatmap directory|
| `--myfiles_dir` | XBVR_MYFILESDIR | String| path to the myfiles directory for serving users own content (eg images|
| `--databaseurl` | DATABASE_URL | String|override default database path|
| `--web_port` | XBVR_WEB_PORT | Int| override default Web Page port 9999|
| `--ws_addr` | DB_CONNECTION_POOL_SIZE | String| override default Websocket address from the default 0.0.0.0:9998|
| `--concurrent_scrapers` | CONCURRENT_SCRAPERS | String| set the number of scrapers that run concurrently default 9999|
1 change: 1 addition & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type EnvConfigSpec struct {
WsAddr string `envconfig:"XBVR_WS_ADDR" required:"false" default:""`
WebPort int `envconfig:"XBVR_WEB_PORT" required:"false" default:"0"`
DBConnectionPoolSize int `envconfig:"DB_CONNECTION_POOL_SIZE" required:"false" default:"0"`
ConcurrentScrapers int `envconfig:"CONCURRENT_SCRAPERS" required:"false" default:"9999"`
}

var EnvConfig EnvConfigSpec
Expand Down
7 changes: 7 additions & 0 deletions pkg/common/paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var MyFilesDir string
var DownloadDir string
var WebPort int
var DBConnectionPoolSize int
var ConcurrentScrapers int

func DirSize(path string) (int64, error) {
var size int64
Expand Down Expand Up @@ -53,6 +54,7 @@ func InitPaths() {
web_port := flag.Int("web_port", 0, "Optional: override default Web Page port 9999")
ws_addr := flag.String("ws_addr", "", "Optional: override default Websocket address from the default 0.0.0.0:9998")
db_connection_pool_size := flag.Int("db_connection_pool_size", 0, "Optional: sets a limit to the number of db connections while scraping")
concurrentSscrapers := flag.Int("concurrent_scrapers", 0, "Optional: sets a limit to the number of concurrent scrapers")

flag.Parse()

Expand Down Expand Up @@ -120,6 +122,11 @@ func InitPaths() {
} else {
DBConnectionPoolSize = EnvConfig.DBConnectionPoolSize
}
if *concurrentSscrapers != 0 {
ConcurrentScrapers = *concurrentSscrapers
} else {
ConcurrentScrapers = EnvConfig.ConcurrentScrapers
}

_ = os.MkdirAll(AppDir, os.ModePerm)
_ = os.MkdirAll(ImgDir, os.ModePerm)
Expand Down
21 changes: 20 additions & 1 deletion pkg/tasks/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,31 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect

var wg sync.WaitGroup

sitecnt := 1
concurrent_scrapers := common.ConcurrentScrapers
if concurrent_scrapers == 0 {
concurrent_scrapers = 99999
}
if len(sites) > 0 {
for _, site := range sites {
for _, scraper := range scrapers {
if site.ID == scraper.ID {
wg.Add(1)
go scraper.Scrape(&wg, updateSite, knownScenes, collectedScenes, singleSceneURL, singeScrapeAdditionalInfo, site.LimitScraping)
go func(scraper models.Scraper) {
scraper.Scrape(&wg, updateSite, knownScenes, collectedScenes, singleSceneURL, singeScrapeAdditionalInfo, site.LimitScraping)
var site models.Site
err := site.GetIfExist(scraper.ID)
if err != nil {
log.Error(err)
return
}
site.Save()
}(scraper)

if sitecnt%concurrent_scrapers == 0 { // processing batches of 35 sites
wg.Wait()
}
sitecnt++
}
}
}
Expand Down
Loading