Skip to content

Commit

Permalink
scraper: add SwallowBay native scraper (xbapps#1715)
Browse files Browse the repository at this point in the history
Co-authored-by: crwxaj <crwxaj>
  • Loading branch information
crwxaj authored May 3, 2024
1 parent 46cca32 commit f10bdc9
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 6 deletions.
6 changes: 0 additions & 6 deletions pkg/config/scrapers.json
Original file line number Diff line number Diff line change
Expand Up @@ -367,12 +367,6 @@
"company": "SuckMeVR",
"avatar_url": "https://cdn-vr.sexlikereal.com/images/studio_creatives/logotypes/1/403/logo_crop_1657112082.png"
},
{
"url": "https://www.sexlikereal.com/studios/swallowbay",
"name": "SwallowBay",
"company": "SwallowBay",
"avatar_url": "https://mcdn.vrporn.com/files/20210330092926/swallowbay-400x400.jpg"
},
{
"url": "https://www.sexlikereal.com/studios/sweetlonglips",
"name": "Sweetlonglips",
Expand Down
144 changes: 144 additions & 0 deletions pkg/scrape/swallowbay.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package scrape

import (
"encoding/json"
"fmt"
"regexp"
"strconv"
"strings"
"sync"

"github.com/gocolly/colly/v2"
"github.com/nleeper/goment"
"github.com/thoas/go-funk"
"github.com/xbapps/xbvr/pkg/models"
)

func SwallowBay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
defer wg.Done()
scraperID := "swallowbay"
siteID := "SwallowBay"
logScrapeStart(scraperID, siteID)

sceneCollector := createCollector("swallowbay.com")
siteCollector := createCollector("swallowbay.com")

sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) {
sc := models.ScrapedScene{}
sc.ScraperID = scraperID
sc.SceneType = "VR"
sc.Studio = "SwallowBay"
sc.Site = siteID
sc.SiteID = ""
sc.HomepageURL = e.Request.URL.String()

regexpSceneID := regexp.MustCompile(`\-(\d+)\.html$`)
sc.SiteID = regexpSceneID.FindStringSubmatch(e.Request.URL.Path)[1]

// Title
e.ForEach(`div.content-title h1`, func(id int, e *colly.HTMLElement) {
sc.Title = strings.TrimSpace(e.Text)
})

// Cover URLs
e.ForEach(`dl8-video`, func(id int, e *colly.HTMLElement) {
coverUrl := e.Attr("poster")
sc.Covers = append(sc.Covers, coverUrl)
})

// Cast
sc.ActorDetails = make(map[string]models.ActorDetails)
e.ForEach(`div.content-models a`, func(id int, e *colly.HTMLElement) {
if strings.TrimSpace(e.Text) != "" {
sc.Cast = append(sc.Cast, strings.TrimSpace(e.Attr("title")))
sc.ActorDetails[strings.TrimSpace(e.Text)] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Attr("href")}
}
})

// Tags
ignoreTags := []string{"vr 180", "vr 6k", "8k", "iphone", "ultra high definition"}
e.ForEach(`div.content-tags a`, func(id int, e *colly.HTMLElement) {
tag := strings.ToLower(strings.TrimSpace(e.Text))
if tag != "" {
for _, v := range ignoreTags {
if tag == v {
return
}
}
sc.Tags = append(sc.Tags, tag)
}
})

// Synposis
e.ForEach(`div.content-desc.active`, func(id int, e *colly.HTMLElement) {
sc.Synopsis = strings.TrimSpace(strings.TrimSpace(e.Text))
})

// Release date
e.ForEach(`div.content-data div.content-date`, func(id int, e *colly.HTMLElement) {
date := strings.TrimSuffix(e.Text, "Date: ")
tmpDate, _ := goment.New(strings.TrimSpace(date), "Do MMM, YYYY:")
sc.Released = tmpDate.Format("YYYY-MM-DD")
})

// Duration
e.ForEach(`div.content-data div.content-time`, func(id int, e *colly.HTMLElement) {
parts := strings.Split(e.Text, ":")
if len(parts) > 1 {
tmpDuration, err := strconv.Atoi(strings.TrimSpace(parts[1]))
if err == nil {
sc.Duration = tmpDuration
}
}
})

// No filename information yet

sc.TrailerType = "urls"
var trailers []models.VideoSource
e.ForEach(`dl8-video source`, func(id int, e *colly.HTMLElement) {
trailers = append(trailers, models.VideoSource{URL: e.Attr("src"), Quality: strings.TrimSpace(e.Attr("quality"))})
})
trailerJson, _ := json.Marshal(models.VideoSourceResponse{VideoSources: trailers})
sc.TrailerSrc = string(trailerJson)

if sc.SiteID != "" {
sc.SceneID = fmt.Sprintf("swallowbay-%v", sc.SiteID)

// save only if we got a SceneID
out <- sc
}
})

siteCollector.OnHTML(`div.pagination a`, func(e *colly.HTMLElement) {
if !limitScraping {
pageURL := e.Request.AbsoluteURL(e.Attr("href"))
siteCollector.Visit(pageURL)
}
})

siteCollector.OnHTML(`div.-video div.item-name a`, func(e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))

// If scene exist in database, there's no need to scrape
if !funk.ContainsString(knownScenes, sceneURL) {
sceneCollector.Visit(sceneURL)
}
})

if singleSceneURL != "" {
sceneCollector.Visit(singleSceneURL)
} else {
siteCollector.Visit("https://swallowbay.com/most-recent/")
}

if updateSite {
updateSiteLastUpdate(scraperID)
}
logScrapeFinished(scraperID, siteID)
return nil
}

func init() {
registerScraper("swallowbay", "SwallowBay", "https://swallowbay.com/templates/swallowbay/images/favicons/apple-icon-180x180.png", "swallowbay.com", SwallowBay)
}

0 comments on commit f10bdc9

Please sign in to comment.