Skip to content

Commit

Permalink
scraper: fix VRSpy trailers (xbapps#1535)
Browse files Browse the repository at this point in the history
  • Loading branch information
toshski authored Dec 30, 2023
1 parent bbb630a commit b890929
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 16 deletions.
13 changes: 8 additions & 5 deletions pkg/api/trailers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"html"
"io"
"net/http"
"net/url"
"path"
"regexp"
"strings"

Expand Down Expand Up @@ -78,11 +80,12 @@ func ScrapeHtml(scrapeParams string) models.VideoSourceResponse {
} else {
// extract match with regex expression if one was specified
re := regexp.MustCompile(params.ExtractRegex)
r := re.FindStringSubmatch(e.Text)
if len(r) > 0 {
if r[1] != "" {
srcs = append(srcs, models.VideoSource{URL: r[1], Quality: "unknown"})
}
results := re.FindAllStringSubmatch(e.Text, -1)
for _, result := range results {
parsedURL, _ := url.Parse(result[0])
filename := path.Base(parsedURL.Path)
baseFilename := strings.TrimSuffix(filename, path.Ext(filename))
srcs = append(srcs, models.VideoSource{URL: result[1], Quality: baseFilename})
}
}
})
Expand Down
16 changes: 5 additions & 11 deletions pkg/scrape/vrspy.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,11 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
imageRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `/photos/[^?"]*\.jpg`)
sc.Gallery = imageRegex.FindAllString(nuxtData, -1)

sc.TrailerType = "urls"
params := models.VideoSourceResponse{}
trailersRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `/trailers/([^?"]*)\.mp4`)
for _, trailer := range trailersRegex.FindAllStringSubmatch(nuxtData, -1) {
params.VideoSources = append(params.VideoSources, models.VideoSource{
URL: trailer[0],
Quality: trailer[1],
})
}
strParams, _ := json.Marshal(params)
sc.TrailerSrc = string(strParams)
// trailer details
sc.TrailerType = "scrape_html"
paramsdata := models.TrailerScrape{SceneUrl: sc.HomepageURL, HtmlElement: "script[id=\"__NUXT_DATA__\"]", ExtractRegex: `(https:\/\/cdn.vrspy.com\/videos\/\d*\/trailers\/\dk\.mp4\?token.*?)"`}
jsonStr, _ := json.Marshal(paramsdata)
sc.TrailerSrc = string(jsonStr)

out <- sc
})
Expand Down

0 comments on commit b890929

Please sign in to comment.