Skip to content

Commit

Permalink
chore: -
Browse files Browse the repository at this point in the history
  • Loading branch information
enenumxela committed Apr 17, 2022
1 parent fd57d2f commit 9019c34
Showing 1 changed file with 20 additions and 14 deletions.
34 changes: 20 additions & 14 deletions internal/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,26 +211,30 @@ func (crawler *Crawler) Crawl() (results chan string, err error) {
URL := strings.TrimRight(response.Request.URL.String(), "/")

if _, exists := foundURLs.Load(URL); !exists {
if err := crawler.record(URL); err != nil {
return
}
return
}

foundURLs.Store(URL, struct{}{})
if err := crawler.record(URL); err != nil {
return
}

foundURLs.Store(URL, struct{}{})
})

crawler.PageCollector.OnHTML("*[href]", func(e *colly.HTMLElement) {
relativeURL := e.Attr("href")
absoluteURL := e.Request.AbsoluteURL(relativeURL)

if _, exists := foundURLs.Load(absoluteURL); !exists {
if err := crawler.record(absoluteURL); err != nil {
return
}
if _, exists := foundURLs.Load(absoluteURL); exists {
return
}

foundURLs.Store(absoluteURL, struct{}{})
if err := crawler.record(absoluteURL); err != nil {
return
}

foundURLs.Store(absoluteURL, struct{}{})

if _, exists := visitedURLs.Load(absoluteURL); !exists {
e.Request.Visit(relativeURL)
}
Expand All @@ -240,14 +244,16 @@ func (crawler *Crawler) Crawl() (results chan string, err error) {
relativeURL := e.Attr("src")
absoluteURL := e.Request.AbsoluteURL(relativeURL)

if _, exists := foundURLs.Load(absoluteURL); !exists {
if err := crawler.record(absoluteURL); err != nil {
return
}
if _, exists := foundURLs.Load(absoluteURL); exists {
return
}

foundURLs.Store(absoluteURL, struct{}{})
if err := crawler.record(absoluteURL); err != nil {
return
}

foundURLs.Store(absoluteURL, struct{}{})

if _, exists := visitedURLs.Load(absoluteURL); !exists {
e.Request.Visit(relativeURL)
}
Expand Down

0 comments on commit 9019c34

Please sign in to comment.