From 1371217219ec182e03f71b39ac9dafa25bc91118 Mon Sep 17 00:00:00 2001
From: toshski <104477758+toshski@users.noreply.github.com>
Date: Tue, 14 Nov 2023 09:17:29 +1300
Subject: [PATCH] WetVR Scraper changes (#1488)
---
pkg/migrations/migrations.go | 30 +++++++++++++++++
pkg/models/model_scene.go | 2 ++
pkg/scrape/wetvr.go | 57 +++++++++++++++++----------------
ui/src/views/scenes/Filters.vue | 1 +
4 files changed, 63 insertions(+), 27 deletions(-)
diff --git a/pkg/migrations/migrations.go b/pkg/migrations/migrations.go
index ef087c9aa..afa05ed4c 100644
--- a/pkg/migrations/migrations.go
+++ b/pkg/migrations/migrations.go
@@ -1803,6 +1803,36 @@ func Migrate() {
return nil
},
},
+ {
+ ID: "0071-Update-WetVR",
+ Migrate: func(tx *gorm.DB) error {
+ var scenes []models.Scene
+
+ err := tx.Where("site = ?", "WetVR").Find(&scenes).Error
+ if err != nil {
+ return err
+ }
+ for _, scene := range scenes {
+ scene.TrailerType = "scrape_html"
+ scene.TrailerSource = `{"scene_url":"` + scene.SceneURL + `","html_element":"deo-video source","extract_regex":"","content_base_url":"","record_path":"","content_path":"src","encoding_path":"","quality_path":"quality"}`
+ scene.MemberURL = strings.Replace(scene.SceneURL, "https://wetvr.com/", "https://wetvr.com/members/", 1)
+
+ var filenames []string
+ err = json.Unmarshal([]byte(scene.FilenamesArr), &filenames)
+ baseFilename := strings.TrimPrefix(scene.SceneURL, "https://wetvr.com/video/")
+ if !strings.Contains(scene.FilenamesArr, "2700.mp4") {
+ filenames = append(filenames, "wetvr-"+baseFilename+"-2700.mp4")
+ filenames = append(filenames, "wetvr-"+baseFilename+"-2048.mp4")
+ filenames = append(filenames, "wetvr-"+baseFilename+"-1600.mp4")
+ filenames = append(filenames, "wetvr-"+baseFilename+"-960.mp4")
+ tmp, _ := json.Marshal(filenames)
+ scene.FilenamesArr = string(tmp)
+ }
+ tx.Save(&scene)
+ }
+ return nil
+ },
+ },
})
if err := m.Migrate(); err != nil {
diff --git a/pkg/models/model_scene.go b/pkg/models/model_scene.go
index 62d8b225e..980607413 100644
--- a/pkg/models/model_scene.go
+++ b/pkg/models/model_scene.go
@@ -1047,6 +1047,8 @@ func queryScenes(db *gorm.DB, r RequestSceneList) (*gorm.DB, *gorm.DB) {
tx = tx.Order("updated_at desc")
case "script_published_desc":
tx = tx.Order("script_published desc")
+ case "scene_id_desc":
+ tx = tx.Order("scene_id desc")
case "random":
if dbConn.Driver == "mysql" {
tx = tx.Order("rand()")
diff --git a/pkg/scrape/wetvr.go b/pkg/scrape/wetvr.go
index 8af34c8c2..e6d864520 100644
--- a/pkg/scrape/wetvr.go
+++ b/pkg/scrape/wetvr.go
@@ -2,8 +2,6 @@ package scrape
import (
"encoding/json"
- "regexp"
- "strconv"
"strings"
"sync"
"time"
@@ -24,61 +22,57 @@ func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
sceneCollector := createCollector("wetvr.com")
siteCollector := createCollector("wetvr.com")
- // RegEx Patterns
- durationRegEx := regexp.MustCompile(`(?i)DURATION:\W(\d+)`)
-
- sceneCollector.OnHTML(`div#t2019`, func(e *colly.HTMLElement) {
+ sceneCollector.OnHTML(`div#trailer_player`, func(e *colly.HTMLElement) {
sc := models.ScrapedScene{}
sc.ScraperID = scraperID
sc.SceneType = "VR"
sc.Studio = "WetVR"
sc.Site = siteID
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]
- sc.MembersUrl = strings.Replace(sc.HomepageURL, "https://wetvr.com/", "https://members.wetvr.com/", 1)
+ sc.MembersUrl = strings.Replace(sc.HomepageURL, "https://wetvr.com/", "https://wetvr.com/members/", 1)
// Scene ID - get from previous page
sc.SiteID = e.Request.Ctx.GetAny("scene-id").(string)
sc.SceneID = slugify.Slugify(sc.Site + "-" + sc.SiteID)
// Title
- sc.Title = strings.TrimSpace(e.ChildText(`h1.t2019-stitle`))
+ sc.Title = strings.TrimSpace(e.ChildText(`div.scene-info h1`))
- // Date
scenedate := e.Request.Ctx.GetAny("scene-date").(string)
if scenedate != "" {
- tmpDate, _ := goment.New(scenedate, "MMMM DD, YYYY")
+ tmpDate, _ := goment.New(scenedate, "MM/DD/YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
}
- // Duration
- tmpDuration := durationRegEx.FindStringSubmatch(e.ChildText(`div#t2019-stime`))[1]
- sc.Duration, _ = strconv.Atoi(tmpDuration)
-
// Cover URLs
- coverSrc := e.ChildAttr(`div#t2019-video deo-video`, "cover-image")
+ coverSrc := e.ChildAttr(`div[id="player-wrapper"] deo-video`, "cover-image")
if coverSrc == "" {
- coverSrc = e.ChildAttr(`div#t2019-video img#no-player-image`, "src")
+ coverSrc = strings.Split(e.ChildAttr(`div[id="no-player-wrapper"] div.bg-cover`, "style"), "background-image: url(")[1]
+ coverSrc = strings.TrimPrefix(coverSrc, "'")
+ coverSrc = strings.TrimSuffix(coverSrc, "')")
}
if coverSrc != "" {
sc.Covers = append(sc.Covers, e.Request.AbsoluteURL(coverSrc))
}
// Gallery
- e.ForEach(`div.t2019-thumbs img`, func(id int, e *colly.HTMLElement) {
+ e.ForEach(`div.items-center a[href="/join" ] img`, func(id int, e *colly.HTMLElement) {
if id > 0 {
sc.Gallery = append(sc.Gallery, e.Request.AbsoluteURL(e.Attr("src")))
}
})
// Synopsis
- sc.Synopsis = strings.TrimSpace(e.ChildText(`div#t2019-description`))
+ sc.Synopsis = strings.TrimSpace(e.ChildText(`div.items-start span`))
// trailer details
- sc.TrailerType = "deovr"
- sc.TrailerSrc = strings.Replace(sc.HomepageURL, "/video/", "/deovr/", 1)
+ sc.TrailerType = "scrape_html"
+ params := models.TrailerScrape{SceneUrl: sc.HomepageURL, HtmlElement: "deo-video source", ContentPath: "src", QualityPath: "quality"}
+ strParams, _ := json.Marshal(params)
+ sc.TrailerSrc = string(strParams)
// Cast
- e.ForEach(`div#t2019-models a`, func(id int, e *colly.HTMLElement) {
+ e.ForEach(`a[href^="/models/"]`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, strings.TrimSpace(e.Text))
})
@@ -86,17 +80,21 @@ func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
// no tags on this site
// Filenames
- // NOTE: no way to guess filename
+ baseFilename := strings.TrimPrefix(sc.HomepageURL, "https://wetvr.com/video/")
+ sc.Filenames = append(sc.Filenames, "wetvr-"+baseFilename+"-2700.mp4")
+ sc.Filenames = append(sc.Filenames, "wetvr-"+baseFilename+"-2048.mp4")
+ sc.Filenames = append(sc.Filenames, "wetvr-"+baseFilename+"-1600.mp4")
+ sc.Filenames = append(sc.Filenames, "wetvr-"+baseFilename+"-960.mp4")
out <- sc
})
- siteCollector.OnHTML(`ul.pagination a.page-link`, func(e *colly.HTMLElement) {
+ siteCollector.OnHTML(`ul a.page-link`, func(e *colly.HTMLElement) {
pageURL := e.Request.AbsoluteURL(e.Attr("href"))
siteCollector.Visit(pageURL)
})
- siteCollector.OnHTML(`div.card`, func(e *colly.HTMLElement) {
+ siteCollector.OnHTML(`div:has(p:contains("Latest")) div[id^="r-"]`, func(e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.ChildAttr("a", "href"))
// If scene exist in database, there's no need to scrape
@@ -104,9 +102,14 @@ func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
// SceneID and release date are only available here on div.card
ctx := colly.NewContext()
- ctx.Put("scene-id", e.Attr("data-video-id"))
- ctx.Put("scene-date", e.Attr("data-date"))
-
+ ctx.Put("scene-id", strings.TrimPrefix(e.Attr("id"), "r-"))
+ // get the date if it exists
+ pDate := e.DOM.Find(`div.video-thumbnail-footer div>span`)
+ if pDate.Length() > 0 {
+ ctx.Put("scene-date", pDate.Text())
+ } else {
+ ctx.Put("scene-date", "")
+ }
sceneCollector.Request("GET", sceneURL, nil, ctx, nil)
}
})
diff --git a/ui/src/views/scenes/Filters.vue b/ui/src/views/scenes/Filters.vue
index 04b3e6538..a6738904b 100644
--- a/ui/src/views/scenes/Filters.vue
+++ b/ui/src/views/scenes/Filters.vue
@@ -56,6 +56,7 @@
+