Skip to content

Commit

Permalink
Merge commit 'refs/pull/1808/head' of https://github.com/xbapps/xbvr
Browse files Browse the repository at this point in the history
…into impatient
  • Loading branch information
vt-idiot committed Aug 4, 2024
2 parents a03f7f4 + c075215 commit a66d5c4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 57 deletions.
37 changes: 11 additions & 26 deletions pkg/models/model_external_reference.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"time"

"github.com/avast/retry-go/v4"
"github.com/gocolly/colly/v2"
"github.com/markphelps/optional"

"github.com/xbapps/xbvr/pkg/common"
Expand Down Expand Up @@ -962,31 +961,17 @@ func (scrapeRules ActorScraperConfig) buildGenericActorScraperRules() {

siteDetails = GenericScraperRuleSet{}
siteDetails.Domain = "vrspy.com"
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "biography", Selector: `.star-biography-description`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "image_url", Selector: `.star-photo img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "images", Native: func(e interface{}) []string {
html := e.(*colly.HTMLElement)
var values []string
if mainPhotoURL := html.ChildAttr(`.star-photo img`, `src`); mainPhotoURL != "" {
partialURLRegex := regexp.MustCompile(`^(.*)/[^/]+.jpg`)
if partialURLMatch := partialURLRegex.FindStringSubmatch(mainPhotoURL); len(partialURLMatch) == 2 {
fullURLRegex := regexp.MustCompile(regexp.QuoteMeta(partialURLMatch[1]) + `/[^"]+.jpg`)
nuxtData := html.ChildText(`#__NUXT_DATA__`)
if imageURLs := fullURLRegex.FindAllString(nuxtData, -1); imageURLs != nil {
values = imageURLs
}
}
}
return values
}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "height", Selector: `.about-me-mobile .stars-params-title:contains("Height:") + .stars-params-value`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "weight", Selector: `.about-me-mobile .stars-params-title:contains("Weight:") + .stars-params-value`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "band_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "1"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "cup_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "2"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "waist_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "3"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hip_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "4"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "nationality", Selector: `.about-me-mobile .stars-params-title:contains("Nationality:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "Lookup Country"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hair_color", Selector: `.about-me-mobile .stars-params-title:contains("Hair Color:") + .stars-params-value`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "biography", Selector: `.star-bio .show-more-text-container`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "image_url", Selector: `.avatar img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "images", Selector: `.avatar img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "height", Selector: `.star-info-row-title:contains("Height:") + span`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "weight", Selector: `.star-info-row-title:contains("Weight:") + span`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "band_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "1"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "cup_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "2"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "waist_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "3"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hip_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "4"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "nationality", Selector: `.star-info-row-title:contains("Nationality:") + span`, PostProcessing: []PostProcessing{{Function: "Lookup Country"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hair_color", Selector: `.star-info-row-title:contains("Hair Color:") + span`})
scrapeRules.GenericActorScrapingConfig["vrspy scrape"] = siteDetails

siteDetails = GenericScraperRuleSet{}
Expand Down
51 changes: 20 additions & 31 deletions pkg/scrape/vrspy.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,50 +55,39 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-

sc.SceneID = scraperID + "-" + sc.SiteID

sc.Title = e.ChildText(`.video-content .header-container .section-header-container`)
sc.Synopsis = e.ChildText(`.video-description`)
sc.Tags = e.ChildTexts(`.video-categories .v-chip__content`)

e.ForEach(`.video-details-row`, func(id int, e *colly.HTMLElement) {
parts := strings.SplitN(e.Text, ":", 2)
key, value := parts[0], parts[1]
switch strings.TrimSpace(key) {
case "Stars":
sc.ActorDetails = make(map[string]models.ActorDetails)
e.ForEach(`.stars-list a`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, e.Text)
sc.ActorDetails[e.Text] = models.ActorDetails{
Source: scraperID + " scrape",
ProfileUrl: e.Request.AbsoluteURL(e.Attr(`href`)),
}
})
case "Duration":
durationParts := strings.Split(strings.SplitN(strings.TrimSpace(value), " ", 2)[0], ":")
if len(durationParts) == 3 {
hours, _ := strconv.Atoi(durationParts[0])
minutes, _ := strconv.Atoi(durationParts[1])
sc.Duration = hours*60 + minutes
sc.Title = e.ChildText(`.video-content .header-container .video-title .section-header-container`)
sc.Synopsis = e.ChildText(`.video-description-container`)
sc.Tags = e.ChildTexts(`.video-categories .chip`)

sc.ActorDetails = make(map[string]models.ActorDetails)
e.ForEach(`.video-actor-item`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, e.Text)
e.ForEach(`a`, func(id int, a *colly.HTMLElement) {
sc.ActorDetails[e.Text] = models.ActorDetails{
Source: scraperID + " scrape",
ProfileUrl: e.Request.AbsoluteURL(a.Attr(`href`)),
}
case "Release date":
tmpDate, _ := goment.New(strings.TrimSpace(value), "DD MMM YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
}

})
})

var durationParts []string
// Date & Duration
e.ForEach(`div.single-video-info__list-item`, func(id int, e *colly.HTMLElement) {
e.ForEach(`.video-details-info-item`, func(id int, e *colly.HTMLElement) {
parts := strings.Split(e.Text, ":")
if len(parts) > 1 {
switch strings.TrimSpace(parts[0]) {
case "Release date":
tmpDate, _ := goment.New(strings.TrimSpace(parts[1]), "MMM D, YYYY")
tmpDate, _ := goment.New(strings.TrimSpace(parts[1]), "DD MMMM YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
case "Duration":
durationParts = strings.Split(strings.TrimSpace(parts[1]), " ")
tmpDuration, err := strconv.Atoi(durationParts[0])
mins := tmpDuration * 60
tmpDuration, err = strconv.Atoi(parts[2])
mins = mins + tmpDuration
if err == nil {
sc.Duration = tmpDuration
sc.Duration = mins
}
}
}
Expand All @@ -114,7 +103,7 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
}

nuxtData := e.ChildText(`#__NUXT_DATA__`)
imageRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `(/photos/[^?"]*\.jpg)\?width`)
imageRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `(/photos/[^?"]*\.jpg)`)
sc.Gallery = imageRegex.FindAllString(nuxtData, -1)

// trailer details
Expand Down

0 comments on commit a66d5c4

Please sign in to comment.