From dbb8a0099e538781acfdf69f90ed0e9232fb6236 Mon Sep 17 00:00:00 2001 From: toshski <104477758+toshski@users.noreply.github.com> Date: Sun, 31 Dec 2023 00:52:38 +1300 Subject: [PATCH] scraper: fix BaberoticaVR scraper (#1544) * Fixes Actor & Single Scene Scrapes * Adds description scraping --- pkg/migrations/migrations.go | 7 +++++++ pkg/scrape/baberoticavr.go | 19 ++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/pkg/migrations/migrations.go b/pkg/migrations/migrations.go index 13c0647de..c8f20da7d 100644 --- a/pkg/migrations/migrations.go +++ b/pkg/migrations/migrations.go @@ -1833,6 +1833,13 @@ func Migrate() { return nil }, }, + { + ID: "0072-Update-Baberotica-Studio", + Migrate: func(tx *gorm.DB) error { + sql := `update scenes set studio = 'Baberotica' where site = 'BaberoticaVR' and studio <> 'Baberotica';` + return tx.Exec(sql).Error + }, + }, }) if err := m.Migrate(); err != nil { diff --git a/pkg/scrape/baberoticavr.go b/pkg/scrape/baberoticavr.go index 7c5011e01..eef0a17fa 100644 --- a/pkg/scrape/baberoticavr.go +++ b/pkg/scrape/baberoticavr.go @@ -10,6 +10,7 @@ import ( "sync" "github.com/go-resty/resty/v2" + "github.com/gocolly/colly/v2" "github.com/gosimple/slug" "github.com/thoas/go-funk" "github.com/xbapps/xbvr/pkg/models" @@ -20,6 +21,15 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out scraperID := "baberoticavr" siteID := "BaberoticaVR" logScrapeStart(scraperID, siteID) + additionalDetailCollector := createCollector("baberoticavr.com") + + additionalDetailCollector.OnHTML(`html`, func(e *colly.HTMLElement) { + sc := e.Request.Ctx.GetAny("scene").(models.ScrapedScene) + e.ForEach(`div.videoinfo>p`, func(id int, e *colly.HTMLElement) { + sc.Synopsis = strings.TrimSpace(e.Text) + }) + out <- sc + }) resp, err := resty.New().R(). SetHeader("User-Agent", UserAgent). @@ -76,7 +86,7 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out for _, row := range data { sc := models.ScrapedScene{} sceneURL := row[3] - if funk.ContainsString(knownScenes, sceneURL) { + if funk.ContainsString(knownScenes, sceneURL) && sceneURL != singleSceneURL { continue } @@ -129,8 +139,6 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out url := "https://baberoticavr.com/model/" + slug.Make(actor) + "/" sc.ActorDetails[actor] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: url} } - sc.Studio = actor - break } // trailer details @@ -143,7 +151,9 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out sc.SceneID = fmt.Sprintf("baberoticavr-%v", sc.SiteID) } - out <- sc + ctx := colly.NewContext() + ctx.Put("scene", sc) + additionalDetailCollector.Request("GET", sc.HomepageURL, nil, ctx, nil) } if updateSite { @@ -152,7 +162,6 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out logScrapeFinished(scraperID, siteID) return nil } - func init() { registerScraper("baberoticavr", "BaberoticaVR", "https://baberoticavr.com/wp-content/themes/baberoticavr/images/fav/android-chrome-192x192.png", "baberoticavr.com", BaberoticaVR) }