Skip to content

Commit

Permalink
scraper: fix SLR trans scene cover images (xbapps#1641)
Browse files Browse the repository at this point in the history
* Update slrstudios.go

* Incorporates the changes from xbapps#1624 as new fallback for failure to retrieve cover image using API
* Fixes trans covers, they are broken at the moment for the same reason that necessitated xbapps#1624 but required a different fix

* Update slrstudios.go

Forgot to import `net/url`, gofmt

* URL decoding was unnecessary. String replacement.

* Unrelated: (temporary) gitpod environment fix for achrinza/node-ipc#59
  • Loading branch information
vt-idiot authored Apr 2, 2024
1 parent a7fe03f commit 39975a2
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .gitpod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ image:
file: .gitpod.dockerfile
tasks:
- name: Continuous Build
command: yarn global add concurrently && go install github.com/cosmtrek/air@latest && cd /workspace/xbvr && go generate && go get && yarn && yarn dev
command: yarn config set ignore-engines true && yarn global add concurrently && go install github.com/cosmtrek/air@latest && cd /workspace/xbvr && go generate && go get && yarn && yarn dev
ports:
- port: 9999
onOpen: open-preview
Expand Down
39 changes: 20 additions & 19 deletions pkg/scrape/slrstudios.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
commonDb, _ := models.GetCommonDB()

// RegEx Patterns
coverRegEx := regexp.MustCompile(`background(?:-image)?\s*?:\s*?url\s*?\(\s*?(.*?)\s*?\)`)
durationRegExForSceneCard := regexp.MustCompile(`^(?:(\d{2}):)?(\d{2}):(\d{2})$`)
durationRegExForScenePage := regexp.MustCompile(`^T(\d{0,2})H?(\d{2})M(\d{2})S$`)
filenameRegEx := regexp.MustCompile(`[:?]|( & )|( \\u0026 )`)
Expand All @@ -48,7 +47,7 @@ func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
sc.Site = siteID
sc.MasterSiteId = masterSiteId
if scraperID == "" {
// there maybe no site/studio if user is jusy scraping a scene url
// there maybe no site/studio if user is just scraping a scene url
e.ForEach(`div[data-qa="page-scene-studio-name"]`, func(id int, e *colly.HTMLElement) {
sc.Studio = strings.TrimSpace(e.Text)
sc.Site = strings.TrimSpace(e.Text)
Expand Down Expand Up @@ -153,26 +152,28 @@ func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
if len(coverURL) > 0 {
sc.Covers = append(sc.Covers, coverURL)
} else {
coverURL := e.ChildAttr(`.splash-screen > img`, "src")
if len(coverURL) > 0 {
sc.Covers = append(sc.Covers, coverURL)
} else {
m := coverRegEx.FindStringSubmatch(strings.TrimSpace(e.ChildAttr(`.c-webxr-splash-screen`, "style")))
if len(m) > 0 && len(m[1]) > 0 {
sc.Covers = append(sc.Covers, m[1])
}
}
e.ForEach(`link[as="image"]`, func(id int, e *colly.HTMLElement) {
sc.Covers = append(sc.Covers, e.Request.AbsoluteURL(e.Attr("href")))
})
}
} else {
tcoverURL := e.ChildAttr(`.splash-screen > img`, "src")
if len(tcoverURL) > 0 {
sc.Covers = append(sc.Covers, tcoverURL)
} else {
m := coverRegEx.FindStringSubmatch(strings.TrimSpace(e.ChildAttr(`.c-webxr-splash-screen`, "style")))
if len(m) > 0 && len(m[1]) > 0 {
sc.Covers = append(sc.Covers, m[1])
posterURLFound := false
e.ForEach(`script[type="text/javascript"]`, func(id int, e *colly.HTMLElement) {
if posterURLFound {
return
}
}
scriptContent := e.Text
if strings.Contains(scriptContent, "posterURL") {
startIndex := strings.Index(scriptContent, `"posterURL":"`) + len(`"posterURL":"`)
endIndex := strings.Index(scriptContent[startIndex:], `"`)
if startIndex >= 0 && endIndex >= 0 {
posterURL := scriptContent[startIndex : startIndex+endIndex]
unescapedURL := strings.ReplaceAll(posterURL, `\`, "")
sc.Covers = append(sc.Covers, unescapedURL)
posterURLFound = true
}
}
})
}

// straight and trans videos use a different page structure
Expand Down

0 comments on commit 39975a2

Please sign in to comment.