Skip to content

Commit

Permalink
scraper: fix BaberoticaVR scraper (#1544)
Browse files Browse the repository at this point in the history
* Fixes Actor & Single Scene Scrapes

* Adds description scraping
  • Loading branch information
toshski authored Dec 30, 2023
1 parent b3325a6 commit dbb8a00
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
7 changes: 7 additions & 0 deletions pkg/migrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -1833,6 +1833,13 @@ func Migrate() {
return nil
},
},
{
ID: "0072-Update-Baberotica-Studio",
Migrate: func(tx *gorm.DB) error {
sql := `update scenes set studio = 'Baberotica' where site = 'BaberoticaVR' and studio <> 'Baberotica';`
return tx.Exec(sql).Error
},
},
})

if err := m.Migrate(); err != nil {
Expand Down
19 changes: 14 additions & 5 deletions pkg/scrape/baberoticavr.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"

"github.com/go-resty/resty/v2"
"github.com/gocolly/colly/v2"
"github.com/gosimple/slug"
"github.com/thoas/go-funk"
"github.com/xbapps/xbvr/pkg/models"
Expand All @@ -20,6 +21,15 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
scraperID := "baberoticavr"
siteID := "BaberoticaVR"
logScrapeStart(scraperID, siteID)
additionalDetailCollector := createCollector("baberoticavr.com")

additionalDetailCollector.OnHTML(`html`, func(e *colly.HTMLElement) {
sc := e.Request.Ctx.GetAny("scene").(models.ScrapedScene)
e.ForEach(`div.videoinfo>p`, func(id int, e *colly.HTMLElement) {
sc.Synopsis = strings.TrimSpace(e.Text)
})
out <- sc
})

resp, err := resty.New().R().
SetHeader("User-Agent", UserAgent).
Expand Down Expand Up @@ -76,7 +86,7 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
for _, row := range data {
sc := models.ScrapedScene{}
sceneURL := row[3]
if funk.ContainsString(knownScenes, sceneURL) {
if funk.ContainsString(knownScenes, sceneURL) && sceneURL != singleSceneURL {
continue
}

Expand Down Expand Up @@ -129,8 +139,6 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
url := "https://baberoticavr.com/model/" + slug.Make(actor) + "/"
sc.ActorDetails[actor] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: url}
}
sc.Studio = actor
break
}

// trailer details
Expand All @@ -143,7 +151,9 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
sc.SceneID = fmt.Sprintf("baberoticavr-%v", sc.SiteID)
}

out <- sc
ctx := colly.NewContext()
ctx.Put("scene", sc)
additionalDetailCollector.Request("GET", sc.HomepageURL, nil, ctx, nil)
}

if updateSite {
Expand All @@ -152,7 +162,6 @@ func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
logScrapeFinished(scraperID, siteID)
return nil
}

func init() {
registerScraper("baberoticavr", "BaberoticaVR", "https://baberoticavr.com/wp-content/themes/baberoticavr/images/fav/android-chrome-192x192.png", "baberoticavr.com", BaberoticaVR)
}

0 comments on commit dbb8a00

Please sign in to comment.