Skip to content

Commit

Permalink
chore: -
Browse files Browse the repository at this point in the history
  • Loading branch information
enenumxela committed Apr 10, 2022
1 parent e1b9089 commit 73618c3
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 71 deletions.
82 changes: 41 additions & 41 deletions cmd/sigrawl3r/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,44 +17,44 @@ import (

var (
au aurora.Aurora
conf configuration.Configuration
c configuration.Configuration
noColor bool
silent bool
URL string
URLsFile string
silent bool
noColor bool
)

func displayBanner() {
fmt.Fprintln(os.Stderr, configuration.BANNER)
}

func init() {
flag.IntVar(&conf.Concurrency, "concurrency", configuration.DefaultConcurrency, "")
flag.IntVar(&conf.Concurrency, "c", configuration.DefaultConcurrency, "")
flag.StringVar(&conf.Cookie, "cookie", "", "")
flag.BoolVar(&conf.Debug, "debug", false, "")
flag.IntVar(&conf.Depth, "depth", configuration.DefaultDepth, "")
flag.IntVar(&conf.Depth, "d", configuration.DefaultDepth, "")
flag.StringVar(&conf.Headers, "headers", "", "")
flag.StringVar(&conf.Headers, "H", "", "")
flag.BoolVar(&conf.Headless, "headless", true, "")
flag.BoolVar(&conf.IncludeSubdomains, "include-subs", false, "")
flag.IntVar(&c.Concurrency, "concurrency", configuration.DefaultConcurrency, "")
flag.IntVar(&c.Concurrency, "c", configuration.DefaultConcurrency, "")
flag.StringVar(&c.Cookie, "cookie", "", "")
flag.BoolVar(&c.Debug, "debug", false, "")
flag.IntVar(&c.Depth, "depth", configuration.DefaultDepth, "")
flag.IntVar(&c.Depth, "d", configuration.DefaultDepth, "")
flag.StringVar(&c.Headers, "headers", "", "")
flag.StringVar(&c.Headers, "H", "", "")
flag.BoolVar(&c.Headless, "headless", true, "")
flag.BoolVar(&c.IncludeSubdomains, "include-subs", false, "")
flag.BoolVar(&noColor, "no-color", false, "")
flag.StringVar(&conf.Proxy, "proxy", "", "")
flag.StringVar(&conf.Proxy, "p", "", "")
flag.IntVar(&conf.MaxRandomDelay, "random-delay", configuration.DefaultMaxRandomDelay, "")
flag.IntVar(&conf.MaxRandomDelay, "R", configuration.DefaultMaxRandomDelay, "")
flag.BoolVar(&conf.Render, "render", false, "")
flag.BoolVar(&conf.Render, "r", false, "")
flag.StringVar(&c.Proxy, "proxy", "", "")
flag.StringVar(&c.Proxy, "p", "", "")
flag.IntVar(&c.MaxRandomDelay, "random-delay", configuration.DefaultMaxRandomDelay, "")
flag.IntVar(&c.MaxRandomDelay, "R", configuration.DefaultMaxRandomDelay, "")
flag.BoolVar(&c.Render, "render", false, "")
flag.BoolVar(&c.Render, "r", false, "")
flag.BoolVar(&silent, "silent", false, "")
flag.BoolVar(&silent, "s", false, "")
flag.IntVar(&conf.Threads, "threads", configuration.DefaultThreads, "")
flag.IntVar(&conf.Timeout, "timeout", configuration.DefaultTimeout, "")
flag.IntVar(&c.Threads, "threads", configuration.DefaultThreads, "")
flag.IntVar(&c.Timeout, "timeout", configuration.DefaultTimeout, "")
flag.StringVar(&URL, "url", "", "")
flag.StringVar(&URL, "u", "", "")
flag.StringVar(&URLsFile, "urls", "", "")
flag.StringVar(&URLsFile, "U", "", "")
flag.StringVar(&conf.UserAgent, "user-agent", "web", "")
flag.StringVar(&c.UserAgent, "user-agent", "web", "")

flag.Usage = func() {
displayBanner()
Expand All @@ -70,7 +70,8 @@ func init() {
h += " --headless If true the browser will be displayed while crawling\n"
h += " Note: Requires '-r, --render' flag\n"
h += " Note: Usage to show browser: '--headless=false' (default true)\n"
h += " -H, --headers Custom headers separated by two semi-colons. E.g. -h 'Cookie: foo=bar;;Referer: http://example.com/'\n"
h += " -H, --headers Custom headers separated by two semi-colons.\n"
h += " E.g. -h 'Cookie: foo=bar;;Referer: http://example.com/'\n"
h += " --include-subs Extend scope to include subdomains (default: false)\n"
h += " --no-color Enable no color mode (default: false)\n"
h += " -p, --proxy Proxy URL (e.g: http://127.0.0.1:8080)\n"
Expand Down Expand Up @@ -100,7 +101,7 @@ func main() {
}

// validate configuration
if err := conf.Validate(); err != nil {
if err := c.Validate(); err != nil {
log.Fatalln(err)
}

Expand Down Expand Up @@ -158,12 +159,10 @@ func main() {
}
}

// process URLs
inputURLsChan := make(chan string, conf.Threads)

wg := new(sync.WaitGroup)
inputURLsChan := make(chan string, c.Threads)

for i := 0; i < conf.Threads; i++ {
for i := 0; i < c.Threads; i++ {
wg.Add(1)

go func() {
Expand All @@ -176,39 +175,39 @@ func main() {
continue
}

URLwg := new(sync.WaitGroup)
URLswg := new(sync.WaitGroup)

c, err := crawler.New(parsedURL, &conf)
c, err := crawler.New(parsedURL, &c)
if err != nil {
fmt.Fprintln(os.Stderr, err)
continue
}

// parse robots.txt
URLwg.Add(1)
// crawl
URLswg.Add(1)
go func() {
defer URLwg.Done()
defer URLswg.Done()

c.ParseRobots()
c.Crawl()
}()

// parse sitemaps
URLwg.Add(1)
URLswg.Add(1)
go func() {
defer URLwg.Done()
defer URLswg.Done()

c.ParseSitemap()
}()

// crawl
URLwg.Add(1)
// parse robots.txt
URLswg.Add(1)
go func() {
defer URLwg.Done()
defer URLswg.Done()

c.Run()
c.ParseRobots()
}()

URLwg.Wait()
URLswg.Wait()
}
}()
}
Expand All @@ -218,5 +217,6 @@ func main() {
}

close(inputURLsChan)

wg.Wait()
}
2 changes: 1 addition & 1 deletion internal/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func New(URL *urlx.URL, configuration *configuration.Configuration) (crawler Cra
return crawler, nil
}

func (crawler *Crawler) Run() (results chan string, err error) {
func (crawler *Crawler) Crawl() (results chan string, err error) {
if crawler.Configuration.Render {
// If we're using a proxy send it to the chrome instance
browser.GlobalContext, browser.GlobalCancel = browser.GetGlobalContext(crawler.Configuration.Headless, crawler.Configuration.Proxy)
Expand Down
49 changes: 26 additions & 23 deletions internal/crawler/robots.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,41 @@ import (
func (crawler *Crawler) ParseRobots() {
robotsURL := fmt.Sprintf("%s://%s/robots.txt", crawler.URL.Scheme, crawler.URL.Host)

res, err := http.Get(robotsURL)
if err != nil {
return
}

if res.StatusCode == 200 {
if _, exists := foundURLs.Load(robotsURL); !exists {
if err := crawler.record(robotsURL); err != nil {
return
}

foundURLs.Store(robotsURL, struct{}{})
}

body, err := ioutil.ReadAll(res.Body)
if _, exists := visitedURLs.Load(robotsURL); !exists {
res, err := http.Get(robotsURL)
if err != nil {
return
}

lines := strings.Split(string(body), "\n")
if res.StatusCode == 200 {
if _, exists := foundURLs.Load(robotsURL); !exists {
if err := crawler.record(robotsURL); err != nil {
return
}

var re = regexp.MustCompile(".*llow: ")
foundURLs.Store(robotsURL, struct{}{})
}

for _, line := range lines {
if strings.Contains(line, "llow: ") {
URL := re.ReplaceAllString(line, "")
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return
}

lines := strings.Split(string(body), "\n")

re := regexp.MustCompile(".*llow: ")

URL = fmt.Sprintf("%s://%s%s", crawler.URL.Scheme, crawler.URL.Host, URL)
for _, line := range lines {
if strings.Contains(line, "llow: ") {
URL := re.ReplaceAllString(line, "")

crawler.PCollector.Visit(URL)
URL = fmt.Sprintf("%s://%s%s", crawler.URL.Scheme, crawler.URL.Host, URL)

crawler.PCollector.Visit(URL)
}
}
}

}

visitedURLs.Store(robotsURL, struct{}{})
}
13 changes: 7 additions & 6 deletions internal/crawler/sitemap.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ func (crawler *Crawler) ParseSitemap() {
for _, path := range sitemapPaths {
sitemapURL := fmt.Sprintf("%s://%s%s", crawler.URL.Scheme, crawler.URL.Host, path)

_ = sitemap.ParseFromSite(sitemapURL, func(entry sitemap.Entry) error {
URL := entry.GetLocation()
if _, exists := visitedURLs.Load(sitemapURL); !exists {
_ = sitemap.ParseFromSite(sitemapURL, func(entry sitemap.Entry) error {
crawler.PCollector.Visit(entry.GetLocation())
return nil
})
}

crawler.PCollector.Visit(URL)

return nil
})
visitedURLs.Store(sitemapURL, struct{}{})
}
}

0 comments on commit 73618c3

Please sign in to comment.