Skip to content

Commit

Permalink
Merge branch 'xbapps:master' into impatient
Browse files Browse the repository at this point in the history
  • Loading branch information
vt-idiot authored Jun 10, 2023
2 parents aa5da50 + 047d145 commit d7c93b5
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 131 deletions.
129 changes: 76 additions & 53 deletions pkg/scrape/sexbabesvr.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
package scrape

import (
"encoding/json"
"net/url"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"

"github.com/gocolly/colly/v2"
"github.com/mozillazg/go-slugify"
"github.com/nleeper/goment"
"github.com/thoas/go-funk"
"github.com/xbapps/xbvr/pkg/models"
)

var currentYear int
var lastMonth int

func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene) error {
defer wg.Done()
scraperID := "sexbabesvr"
Expand All @@ -30,75 +36,54 @@ func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c
sc.Site = siteID
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]

// Scene ID - get from URL
tmp := strings.Split(sc.HomepageURL, "/")
tmp2 := strings.Split(tmp[len(tmp)-1], "-")[0]
sc.SiteID = strings.Replace(tmp2, "vrh", "", -1)
sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID
// Scene ID -
e.ForEach(`dl8-video`, func(id int, e *colly.HTMLElement) {
sc.SiteID = e.Attr("data-scene")
sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID
sc.Covers = append(sc.Covers, e.Attr("poster"))
})

// Title
e.ForEach(`h1.title`, func(id int, e *colly.HTMLElement) {
e.ForEach(`div.video-detail__description--container h1`, func(id int, e *colly.HTMLElement) {
sc.Title = strings.TrimSpace(e.Text)
})

// Cover URLs
e.ForEach(`div.splash-screen`, func(id int, e *colly.HTMLElement) {
base := e.Attr("style")
base = strings.Split(base, "background-image: url(")[1]
base = strings.Split(base, ");")[0]
base = strings.Split(base, "?")[0]
sc.Covers = append(sc.Covers, base)
})

// Gallery
e.ForEach(`figure[itemprop=associatedMedia] > a`, func(id int, e *colly.HTMLElement) {
base := e.Request.AbsoluteURL(e.Attr("href"))
base = strings.Split(base, "?")[0]
sc.Gallery = append(sc.Gallery, base)
e.ForEach(`.gallery-slider img`, func(id int, e *colly.HTMLElement) {
sc.Gallery = append(sc.Gallery, e.Request.AbsoluteURL(e.Attr("src")))
})

// Synopsis
e.ForEach(`.video-group-bottom`, func(id int, e *colly.HTMLElement) {
sc.Synopsis = strings.TrimSpace(e.Text)
e.ForEach(`div.video-detail>div.container>p`, func(id int, e *colly.HTMLElement) {
// Handle blank <p></p> surrounding the synopsis
if strings.TrimSpace(e.Text) != "" {
sc.Synopsis = strings.TrimSpace(e.Text)
}
})

// Tags
e.ForEach(`.video-tags a`, func(id int, e *colly.HTMLElement) {
e.ForEach(`.tags a.tag`, func(id int, e *colly.HTMLElement) {
sc.Tags = append(sc.Tags, strings.TrimSpace(e.Text))
})

// trailer details
sc.TrailerType = "deovr"
sc.TrailerSrc = "http://sexbabesvr.com/deovr/video/id/" + sc.SiteID
sc.TrailerType = "scrape_html"
params := models.TrailerScrape{SceneUrl: sc.HomepageURL, HtmlElement: "dl8-video source", ContentPath: "src", QualityPath: "quality"}
strParams, _ := json.Marshal(params)
sc.TrailerSrc = string(strParams)

// Cast
e.ForEach(`div.video-actress-name a`, func(id int, e *colly.HTMLElement) {
e.ForEach(`div.video-detail__description--author a`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, strings.TrimSpace(e.Text))
})

// Date
e.ForEach(`div.video-info span.date-display-single`, func(id int, e *colly.HTMLElement) {
tmpDate, _ := goment.New(e.Text, "MMM DD, YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
})
sc.Released = e.Request.Ctx.Get("released")

// Duration
e.ForEach(`div.video-additional div.c-grid-one-time span`, func(id int, e *colly.HTMLElement) {
durationText := strings.TrimSpace(strings.Replace(e.Text, "min.", "", -1))
tmpDuration, err := strconv.Atoi(durationText)
if err == nil {
sc.Duration = tmpDuration
} else {
tmpParts := strings.Split(durationText, ":")
if len(tmpParts) == 3 {
hours, _ := strconv.Atoi(tmpParts[0])
minutes, _ := strconv.Atoi(tmpParts[1])
sc.Duration = hours*60 + minutes
}
}
})

// Filenames
// old site, needs update
e.ForEach(`div.modal a.vd-row`, func(id int, e *colly.HTMLElement) {
origURL, _ := url.Parse(e.Attr("href"))
base := origURL.Query().Get("response-content-disposition")
Expand All @@ -113,21 +98,59 @@ func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c
out <- sc
})

siteCollector.OnHTML(`div.c-pagination li.next a`, func(e *colly.HTMLElement) {
siteCollector.OnHTML(`a.pagination__button`, func(e *colly.HTMLElement) {
pageURL := e.Request.AbsoluteURL(e.Attr("href"))
siteCollector.Visit(pageURL)
})

siteCollector.OnHTML(`div.cf div.c-grid-one-hov-field a`, func(e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))
type video struct {
url string
released string
}
videoList := make(map[int]video)

siteCollector.OnHTML(`div.videos__content`, func(e *colly.HTMLElement) {
e.ForEach(`a.video-container__description--information`, func(cnt int, e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))
var re = regexp.MustCompile(`(?m)(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{2}`)
match := re.FindAllString(e.Text, -1)

if len(match) > 0 {
// If scene exist in database, there's no need to scrape
page, _ := strconv.Atoi(strings.ReplaceAll(e.Request.URL.String(), "https://sexbabesvr.com/videos/", ""))
videoList[page*1000+cnt] = video{url: sceneURL, released: match[0]}
}
})
})

currentYear = time.Now().Year()
lastMonth = int(time.Now().Month())

siteCollector.Visit("https://sexbabesvr.com/videos")

// If scene exist in database, there's no need to scrape
if !funk.ContainsString(knownScenes, sceneURL) {
sceneCollector.Visit(sceneURL)
// Sort the videoList as page visits may not return in the same speed and be out of order
var sortedVideos []int
for key := range videoList {
sortedVideos = append(sortedVideos, key)
}
sort.Ints(sortedVideos)

for _, seq := range sortedVideos {
ctx := colly.NewContext()
tmpDate, _ := time.Parse("Jan 02", videoList[seq].released)
if tmpDate.Month() == 12 && lastMonth == 1 {
currentYear -= 1
} else if tmpDate.Month() == 1 && lastMonth == 12 {
currentYear += 1
}
})
tmpDate = tmpDate.AddDate(currentYear-tmpDate.Year(), 0, 0)
lastMonth = int(tmpDate.Month())
ctx.Put("released", tmpDate.Format("2006-01-02"))

siteCollector.Visit("https://sexbabesvr.com/virtualreality/list")
if !funk.ContainsString(knownScenes, videoList[seq].url) {
sceneCollector.Request("GET", videoList[seq].url, nil, ctx, nil)
}
}

if updateSite {
updateSiteLastUpdate(scraperID)
Expand All @@ -137,5 +160,5 @@ func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c
}

func init() {
registerScraper("sexbabesvr", "SexBabesVR", "https://sexbabesvr.com/s/images/favicons/apple-touch-icon.png", SexBabesVR)
registerScraper("sexbabesvr", "SexBabesVR", "https://sexbabesvr.com/assets/front/assets/logo.png", SexBabesVR)
}
Loading

0 comments on commit d7c93b5

Please sign in to comment.