Skip to content

Commit

Permalink
Merge pull request #115 from JenswBE/add-global-ignored-links
Browse files Browse the repository at this point in the history
Add global ignored links
  • Loading branch information
JenswBE authored Nov 12, 2024
2 parents e214369 + 61da17b commit 490df6f
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 20 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ Dead link checker written in Golang using [Colly](https://github.com/gocolly/col
sites:
- url: https://jensw.be
ignored_links: # Optional, list of regex's which should be ignored
- ^https://jensw.be/don't-visit-me.*
- ^https://jensw.be/do-not-visit-me.*
notify: # Optional, send notification to these notifiers by name
- email_technical_en

# Optional, globally ignored links (see sites.ignored_links)
ignored_links:
- ^https://jensw.be/also-not-visit-me.*

# Optional, can also be set as environment variable VERBOSE.
# Default is False.
verbose: False
Expand Down
46 changes: 32 additions & 14 deletions cmd/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ import (
)

type RawConfig struct {
Verbose bool
Cron string
HealthCheck RawHealthCheck `mapstructure:"health_check"`
Notifiers []RawNotifier `mapstructure:"notifiers"`
Sites []RawSiteConfig
Verbose bool
Cron string
HealthCheck RawHealthCheck `mapstructure:"health_check"`
Notifiers []RawNotifier `mapstructure:"notifiers"`
IgnoredLinks []string `mapstructure:"ignored_links"`
Sites []RawSiteConfig
}

type RawHealthCheck struct {
Expand All @@ -39,9 +40,10 @@ type RawSiteConfig struct {

type Config struct {
RawConfig
HealthCheck HealthCheck
Notifiers map[string]NotifierConfig
Sites []SiteConfig
HealthCheck HealthCheck
Notifiers map[string]NotifierConfig
IgnoredLinks []*regexp.Regexp
Sites []SiteConfig
}

type HealthCheck struct {
Expand Down Expand Up @@ -119,6 +121,13 @@ func ParseConfig(configPath string) (*Config, error) {
TemplateName: rawNotifier.TemplateName,
}
}

// Parse global ignored links
config.IgnoredLinks, err = parseIgnoredLinks(rawConfig.IgnoredLinks, "global")
if err != nil {
return nil, err
}

config.Sites = make([]SiteConfig, 0, len(rawConfig.Sites))
for _, rawSite := range rawConfig.Sites {
// Create initial SiteConfig
Expand All @@ -135,12 +144,9 @@ func ParseConfig(configPath string) (*Config, error) {
}

// Parse IgnoredLinks
for _, ignoredLink := range rawSite.IgnoredLinks {
ignoredLinkRegex, err := regexp.Compile(ignoredLink)
if err != nil {
return nil, fmt.Errorf("failed to parse ignored link '%s' for site '%s': %w", ignoredLink, site, err)
}
site.IgnoredLinks = append(site.IgnoredLinks, ignoredLinkRegex)
site.IgnoredLinks, err = parseIgnoredLinks(rawSite.IgnoredLinks, site.URL.String())
if err != nil {
return nil, err
}

// Validate Notify
Expand Down Expand Up @@ -170,3 +176,15 @@ func bindEnvs(bindings []envBinding) error {
}
return nil
}

func parseIgnoredLinks(links []string, site string) ([]*regexp.Regexp, error) {
parsedLinks := make([]*regexp.Regexp, len(links))
for i, link := range links {
linkRegex, err := regexp.Compile(link)
if err != nil {
return nil, fmt.Errorf("failed to parse ignored link '%s' for site '%s': %w", link, site, err)
}
parsedLinks[i] = linkRegex
}
return parsedLinks, nil
}
3 changes: 3 additions & 0 deletions e2e/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ notifiers: # Optional, see https://github.com/containrrr/shoutrrr/blob/main/docs
url: smtp://smpt4dev:smpt4dev@localhost:8025/?from=delic@localhost&to=user@localhost&usehtml=true&subject=Defecte%20links%20gevonden
template_name: "simple_nl" # Currently only "technical_en" and "simple_nl" supported

ignored_links:
- ^http://localhost:9083

sites:
- url: http://localhost:9080
ignored_links:
Expand Down
4 changes: 2 additions & 2 deletions e2e/expected.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
{
"http://localhost:9080": {
"Statistics": {
"LinksCountTotal": 51,
"LinksCountTotal": 52,
"LinksCountByPageURL": {
"http://localhost:9080/": 5,
"http://localhost:9080/tag_a.html": 12,
"http://localhost:9080/tag_a.html": 13,
"http://localhost:9080/tag_img.html": 26,
"http://localhost:9080/tag_link.html": 2,
"http://localhost:9080/tag_picture_source.html": 4,
Expand Down
1 change: 1 addition & 0 deletions e2e/sites/main/tag_a.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<p><a href="mailto:test@example.com">Skip ignored schemes</a></p>

<p><a href="http://localhost:9082/some-page">Skip ignored URL's</a></p>
<p><a href="http://localhost:9083/some-page">Skip global ignored URL's</a></p>
</p>
</body>
</html>
7 changes: 5 additions & 2 deletions internal/check/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"net/http"
"regexp"
"slices"
"strings"
"time"
Expand Down Expand Up @@ -42,11 +43,13 @@ var tags = map[string]tagConfig{
}

// Run checks the provided site. This call blocks until the whole site is checked.
func Run(siteConfig config.SiteConfig, recorder *record.Recorder) error {
func Run(siteConfig config.SiteConfig, globalIgnoredLinks []*regexp.Regexp, recorder *record.Recorder) error {
// Create collector
ignoredLinks := siteConfig.IgnoredLinks
ignoredLinks = append(ignoredLinks, globalIgnoredLinks...)
collector := colly.NewCollector(
colly.Async(true),
colly.DisallowedURLFilters(siteConfig.IgnoredLinks...),
colly.DisallowedURLFilters(ignoredLinks...),
colly.IgnoreRobotsTxt(),
extensions.RandomUserAgent,
)
Expand Down
2 changes: 1 addition & 1 deletion internal/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func (m *Manager) Run(ctx context.Context, c *config.Config) map[string]report.R
go func(siteConfig config.SiteConfig) {
defer wg.Done()
recorder := record.NewRecorder()
if err := check.Run(siteConfig, recorder); err != nil {
if err := check.Run(siteConfig, c.IgnoredLinks, recorder); err != nil {
log.Error().Err(err).Str("site_url", siteConfig.URL.String()).
Msg("Failed to run checker. Will mark as broken link.")
recorder.RecordBrokenLink(record.BrokenLink{
Expand Down

0 comments on commit 490df6f

Please sign in to comment.