From 61da17b64daba19e85b7bba4d9ba737aa4a178dd Mon Sep 17 00:00:00 2001 From: Jens Willemsens <6514515+JenswBE@users.noreply.github.com> Date: Tue, 12 Nov 2024 20:08:42 +0100 Subject: [PATCH] Add global ignored links --- README.md | 6 ++++- cmd/config/config.go | 46 +++++++++++++++++++++++++++------------ e2e/config.yml | 3 +++ e2e/expected.jsonc | 4 ++-- e2e/sites/main/tag_a.html | 1 + internal/check/checker.go | 7 ++++-- internal/manager.go | 2 +- 7 files changed, 49 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index a4500af..d7b7dac 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,14 @@ Dead link checker written in Golang using [Colly](https://github.com/gocolly/col sites: - url: https://jensw.be ignored_links: # Optional, list of regex's which should be ignored - - ^https://jensw.be/don't-visit-me.* + - ^https://jensw.be/do-not-visit-me.* notify: # Optional, send notification to these notifiers by name - email_technical_en +# Optional, globally ignored links (see sites.ignored_links) +ignored_links: + - ^https://jensw.be/also-not-visit-me.* + # Optional, can also be set as environment variable VERBOSE. # Default is False. verbose: False diff --git a/cmd/config/config.go b/cmd/config/config.go index c853dc8..6578ef7 100644 --- a/cmd/config/config.go +++ b/cmd/config/config.go @@ -14,11 +14,12 @@ import ( ) type RawConfig struct { - Verbose bool - Cron string - HealthCheck RawHealthCheck `mapstructure:"health_check"` - Notifiers []RawNotifier `mapstructure:"notifiers"` - Sites []RawSiteConfig + Verbose bool + Cron string + HealthCheck RawHealthCheck `mapstructure:"health_check"` + Notifiers []RawNotifier `mapstructure:"notifiers"` + IgnoredLinks []string `mapstructure:"ignored_links"` + Sites []RawSiteConfig } type RawHealthCheck struct { @@ -39,9 +40,10 @@ type RawSiteConfig struct { type Config struct { RawConfig - HealthCheck HealthCheck - Notifiers map[string]NotifierConfig - Sites []SiteConfig + HealthCheck HealthCheck + Notifiers map[string]NotifierConfig + IgnoredLinks []*regexp.Regexp + Sites []SiteConfig } type HealthCheck struct { @@ -119,6 +121,13 @@ func ParseConfig(configPath string) (*Config, error) { TemplateName: rawNotifier.TemplateName, } } + + // Parse global ignored links + config.IgnoredLinks, err = parseIgnoredLinks(rawConfig.IgnoredLinks, "global") + if err != nil { + return nil, err + } + config.Sites = make([]SiteConfig, 0, len(rawConfig.Sites)) for _, rawSite := range rawConfig.Sites { // Create initial SiteConfig @@ -135,12 +144,9 @@ func ParseConfig(configPath string) (*Config, error) { } // Parse IgnoredLinks - for _, ignoredLink := range rawSite.IgnoredLinks { - ignoredLinkRegex, err := regexp.Compile(ignoredLink) - if err != nil { - return nil, fmt.Errorf("failed to parse ignored link '%s' for site '%s': %w", ignoredLink, site, err) - } - site.IgnoredLinks = append(site.IgnoredLinks, ignoredLinkRegex) + site.IgnoredLinks, err = parseIgnoredLinks(rawSite.IgnoredLinks, site.URL.String()) + if err != nil { + return nil, err } // Validate Notify @@ -170,3 +176,15 @@ func bindEnvs(bindings []envBinding) error { } return nil } + +func parseIgnoredLinks(links []string, site string) ([]*regexp.Regexp, error) { + parsedLinks := make([]*regexp.Regexp, len(links)) + for i, link := range links { + linkRegex, err := regexp.Compile(link) + if err != nil { + return nil, fmt.Errorf("failed to parse ignored link '%s' for site '%s': %w", link, site, err) + } + parsedLinks[i] = linkRegex + } + return parsedLinks, nil +} diff --git a/e2e/config.yml b/e2e/config.yml index b0fa827..282cdd6 100644 --- a/e2e/config.yml +++ b/e2e/config.yml @@ -13,6 +13,9 @@ notifiers: # Optional, see https://github.com/containrrr/shoutrrr/blob/main/docs url: smtp://smpt4dev:smpt4dev@localhost:8025/?from=delic@localhost&to=user@localhost&usehtml=true&subject=Defecte%20links%20gevonden template_name: "simple_nl" # Currently only "technical_en" and "simple_nl" supported +ignored_links: + - ^http://localhost:9083 + sites: - url: http://localhost:9080 ignored_links: diff --git a/e2e/expected.jsonc b/e2e/expected.jsonc index 62223db..b9f843e 100644 --- a/e2e/expected.jsonc +++ b/e2e/expected.jsonc @@ -3,10 +3,10 @@ { "http://localhost:9080": { "Statistics": { - "LinksCountTotal": 51, + "LinksCountTotal": 52, "LinksCountByPageURL": { "http://localhost:9080/": 5, - "http://localhost:9080/tag_a.html": 12, + "http://localhost:9080/tag_a.html": 13, "http://localhost:9080/tag_img.html": 26, "http://localhost:9080/tag_link.html": 2, "http://localhost:9080/tag_picture_source.html": 4, diff --git a/e2e/sites/main/tag_a.html b/e2e/sites/main/tag_a.html index 5c29e27..2f1fdf5 100644 --- a/e2e/sites/main/tag_a.html +++ b/e2e/sites/main/tag_a.html @@ -26,6 +26,7 @@
+