From c075215d3ed9d35444c1f20d693341567cee0acf Mon Sep 17 00:00:00 2001 From: toshski Date: Tue, 30 Jul 2024 14:35:34 +1200 Subject: [PATCH] Fix VRSpy Scene & Actor page changes --- pkg/models/model_external_reference.go | 37 ++++++------------- pkg/scrape/vrspy.go | 51 ++++++++++---------------- 2 files changed, 31 insertions(+), 57 deletions(-) diff --git a/pkg/models/model_external_reference.go b/pkg/models/model_external_reference.go index 340c2f9e3..314ef6651 100644 --- a/pkg/models/model_external_reference.go +++ b/pkg/models/model_external_reference.go @@ -10,7 +10,6 @@ import ( "time" "github.com/avast/retry-go/v4" - "github.com/gocolly/colly/v2" "github.com/markphelps/optional" "github.com/xbapps/xbvr/pkg/common" @@ -962,31 +961,17 @@ func (scrapeRules ActorScraperConfig) buildGenericActorScraperRules() { siteDetails = GenericScraperRuleSet{} siteDetails.Domain = "vrspy.com" - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "biography", Selector: `.star-biography-description`}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "image_url", Selector: `.star-photo img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "images", Native: func(e interface{}) []string { - html := e.(*colly.HTMLElement) - var values []string - if mainPhotoURL := html.ChildAttr(`.star-photo img`, `src`); mainPhotoURL != "" { - partialURLRegex := regexp.MustCompile(`^(.*)/[^/]+.jpg`) - if partialURLMatch := partialURLRegex.FindStringSubmatch(mainPhotoURL); len(partialURLMatch) == 2 { - fullURLRegex := regexp.MustCompile(regexp.QuoteMeta(partialURLMatch[1]) + `/[^"]+.jpg`) - nuxtData := html.ChildText(`#__NUXT_DATA__`) - if imageURLs := fullURLRegex.FindAllString(nuxtData, -1); imageURLs != nil { - values = imageURLs - } - } - } - return values - }}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "height", Selector: `.about-me-mobile .stars-params-title:contains("Height:") + .stars-params-value`}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "weight", Selector: `.about-me-mobile .stars-params-title:contains("Weight:") + .stars-params-value`}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "band_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "1"}}}}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "cup_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "2"}}}}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "waist_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "3"}}}}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hip_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "4"}}}}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "nationality", Selector: `.about-me-mobile .stars-params-title:contains("Nationality:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "Lookup Country"}}}) - siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hair_color", Selector: `.about-me-mobile .stars-params-title:contains("Hair Color:") + .stars-params-value`}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "biography", Selector: `.star-bio .show-more-text-container`}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "image_url", Selector: `.avatar img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "images", Selector: `.avatar img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "height", Selector: `.star-info-row-title:contains("Height:") + span`}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "weight", Selector: `.star-info-row-title:contains("Weight:") + span`}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "band_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "1"}}}}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "cup_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "2"}}}}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "waist_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "3"}}}}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hip_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "4"}}}}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "nationality", Selector: `.star-info-row-title:contains("Nationality:") + span`, PostProcessing: []PostProcessing{{Function: "Lookup Country"}}}) + siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hair_color", Selector: `.star-info-row-title:contains("Hair Color:") + span`}) scrapeRules.GenericActorScrapingConfig["vrspy scrape"] = siteDetails siteDetails = GenericScraperRuleSet{} diff --git a/pkg/scrape/vrspy.go b/pkg/scrape/vrspy.go index 6cccd4559..598a5b2d6 100755 --- a/pkg/scrape/vrspy.go +++ b/pkg/scrape/vrspy.go @@ -55,50 +55,39 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- sc.SceneID = scraperID + "-" + sc.SiteID - sc.Title = e.ChildText(`.video-content .header-container .section-header-container`) - sc.Synopsis = e.ChildText(`.video-description`) - sc.Tags = e.ChildTexts(`.video-categories .v-chip__content`) - - e.ForEach(`.video-details-row`, func(id int, e *colly.HTMLElement) { - parts := strings.SplitN(e.Text, ":", 2) - key, value := parts[0], parts[1] - switch strings.TrimSpace(key) { - case "Stars": - sc.ActorDetails = make(map[string]models.ActorDetails) - e.ForEach(`.stars-list a`, func(id int, e *colly.HTMLElement) { - sc.Cast = append(sc.Cast, e.Text) - sc.ActorDetails[e.Text] = models.ActorDetails{ - Source: scraperID + " scrape", - ProfileUrl: e.Request.AbsoluteURL(e.Attr(`href`)), - } - }) - case "Duration": - durationParts := strings.Split(strings.SplitN(strings.TrimSpace(value), " ", 2)[0], ":") - if len(durationParts) == 3 { - hours, _ := strconv.Atoi(durationParts[0]) - minutes, _ := strconv.Atoi(durationParts[1]) - sc.Duration = hours*60 + minutes + sc.Title = e.ChildText(`.video-content .header-container .video-title .section-header-container`) + sc.Synopsis = e.ChildText(`.video-description-container`) + sc.Tags = e.ChildTexts(`.video-categories .chip`) + + sc.ActorDetails = make(map[string]models.ActorDetails) + e.ForEach(`.video-actor-item`, func(id int, e *colly.HTMLElement) { + sc.Cast = append(sc.Cast, e.Text) + e.ForEach(`a`, func(id int, a *colly.HTMLElement) { + sc.ActorDetails[e.Text] = models.ActorDetails{ + Source: scraperID + " scrape", + ProfileUrl: e.Request.AbsoluteURL(a.Attr(`href`)), } - case "Release date": - tmpDate, _ := goment.New(strings.TrimSpace(value), "DD MMM YYYY") - sc.Released = tmpDate.Format("YYYY-MM-DD") - } + + }) }) var durationParts []string // Date & Duration - e.ForEach(`div.single-video-info__list-item`, func(id int, e *colly.HTMLElement) { + e.ForEach(`.video-details-info-item`, func(id int, e *colly.HTMLElement) { parts := strings.Split(e.Text, ":") if len(parts) > 1 { switch strings.TrimSpace(parts[0]) { case "Release date": - tmpDate, _ := goment.New(strings.TrimSpace(parts[1]), "MMM D, YYYY") + tmpDate, _ := goment.New(strings.TrimSpace(parts[1]), "DD MMMM YYYY") sc.Released = tmpDate.Format("YYYY-MM-DD") case "Duration": durationParts = strings.Split(strings.TrimSpace(parts[1]), " ") tmpDuration, err := strconv.Atoi(durationParts[0]) + mins := tmpDuration * 60 + tmpDuration, err = strconv.Atoi(parts[2]) + mins = mins + tmpDuration if err == nil { - sc.Duration = tmpDuration + sc.Duration = mins } } } @@ -114,7 +103,7 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- } nuxtData := e.ChildText(`#__NUXT_DATA__`) - imageRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `(/photos/[^?"]*\.jpg)\?width`) + imageRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `(/photos/[^?"]*\.jpg)`) sc.Gallery = imageRegex.FindAllString(nuxtData, -1) // trailer details