Skip to content

Commit

Permalink
scraper: fix CzechVR scraper (xbapps#1524)
Browse files Browse the repository at this point in the history
Co-authored-by: crwxaj <crwxaj>
  • Loading branch information
crwxaj authored Nov 28, 2023
1 parent 05028f7 commit 2ce3c02
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions pkg/scrape/czechvr.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,13 @@ func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan
}

// trailer details
sc.TrailerType = "heresphere"
// extract internal id with (\d+)
var re = regexp.MustCompile(`(?m)https:\/\/www.czechvrnetwork.com\/detail-(\d+)`)
r := re.FindStringSubmatch(sc.HomepageURL)
sc.TrailerSrc = "https://www.czechvrnetwork.com/heresphere/videoID" + r[1]
if len(r) > 0 {
sc.TrailerType = "heresphere"
sc.TrailerSrc = "https://www.czechvrnetwork.com/heresphere/videoID" + r[1]
}

// Filenames
e.ForEach(`div.post div.download a.trailer`, func(id int, e *colly.HTMLElement) {
Expand Down Expand Up @@ -155,7 +157,7 @@ func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan

siteCollector.OnHTML(`div.postTag`, func(e *colly.HTMLElement) {
sceneURL := ""
e.ForEach(`div.foto a`, func(id int, e *colly.HTMLElement) {
e.ForEach(`div.navez h2 a`, func(id int, e *colly.HTMLElement) {
sceneURL = e.Request.AbsoluteURL(e.Attr("href"))
// If scene exist in database, there's no need to scrape
if !funk.ContainsString(knownScenes, sceneURL) {
Expand Down

0 comments on commit 2ce3c02

Please sign in to comment.