From 3ff9286e9d85ffde1c0c0770721083d5b91f8323 Mon Sep 17 00:00:00 2001 From: Abhinav Kaushlya Date: Sat, 4 Jan 2020 23:10:58 +0530 Subject: [PATCH] Add option: `IgnoreExternalBrokenLinks` This adds a new option to produce a warning rather than an error for broken external links. It is quite useful for sites having hundreds of external links. --- README.md | 1 + htmltest/check-generic.go | 6 +++- htmltest/check-link.go | 12 +++++--- htmltest/check-link_test.go | 57 +++++++++++++++++++++++++++++++++++++ htmltest/options.go | 2 ++ 5 files changed, 73 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b87a933..606365e 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,7 @@ htmltest uses a YAML configuration file. Put `.htmltest.yml` in the same directo | `IgnoreInternalEmptyHash` | When true prevents raising an error for links with `href="#"`. | `false` | | `IgnoreEmptyHref` | When true prevents raising an error for links with `href=""`. | `false` | | `IgnoreCanonicalBrokenLinks` | When true produces a warning, rather than an error, for broken canonical links. When testing a site which isn't live yet or before publishing a new page canonical links will fail. | `true` | +| `IgnoreExternalBrokenLinks` | When true produces a warning, rather than an error, for broken external links. Useful when testing a site having hundreds of external links. | `false` | | `IgnoreAltMissing` | Turns off image alt attribute checking. | `false` | | `IgnoreDirectoryMissingTrailingSlash` | Turns off errors for links to directories without a trailing slash. | `false` | | `IgnoreSSLVerify` | Turns off x509 errors for self-signed certificates. | `false` | diff --git a/htmltest/check-generic.go b/htmltest/check-generic.go index 9c46609..c2e32d6 100644 --- a/htmltest/check-generic.go +++ b/htmltest/check-generic.go @@ -57,10 +57,14 @@ func (hT *HTMLTest) enforceHTTPS(ref *htmldoc.Reference) { if hT.opts.isURLIgnored(ref.URLString()) { return } + issueLevel := issues.LevelError + if hT.opts.IgnoreExternalBrokenLinks { + issueLevel = issues.LevelWarning + } if hT.opts.EnforceHTTPS { hT.issueStore.AddIssue(issues.Issue{ - Level: issues.LevelError, + Level: issueLevel, Message: "is not an HTTPS target", Reference: ref, }) diff --git a/htmltest/check-link.go b/htmltest/check-link.go index 1ce03ca..f28b96f 100644 --- a/htmltest/check-link.go +++ b/htmltest/check-link.go @@ -111,6 +111,10 @@ func (hT *HTMLTest) checkLink(document *htmldoc.Document, node *html.Node) { } func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) { + issueLevel := issues.LevelError + if hT.opts.IgnoreExternalBrokenLinks { + issueLevel = issues.LevelWarning + } if !hT.opts.CheckExternal { hT.issueStore.AddIssue(issues.Issue{ Level: issues.LevelDebug, @@ -184,7 +188,7 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) { cleanedMessage := strings.TrimPrefix(err.Error(), prefix) // Add error hT.issueStore.AddIssue(issues.Issue{ - Level: issues.LevelError, + Level: issueLevel, Message: cleanedMessage, Reference: ref, }) @@ -192,7 +196,7 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) { } if strings.Contains(err.Error(), "Client.Timeout") { hT.issueStore.AddIssue(issues.Issue{ - Level: issues.LevelError, + Level: issueLevel, Message: "request exceeded our ExternalTimeout", Reference: ref, }) @@ -213,7 +217,7 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) { // Unhandled client error, return generic error hT.issueStore.AddIssue(issues.Issue{ - Level: issues.LevelError, + Level: issueLevel, Message: err.Error(), Reference: ref, }) @@ -249,7 +253,7 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) { } else { // Failed VCRed requests end up here with a status code of zero hT.issueStore.AddIssue(issues.Issue{ - Level: issues.LevelError, + Level: issueLevel, Message: fmt.Sprintf("%s %d", "Non-OK status:", statusCode), Reference: ref, }) diff --git a/htmltest/check-link_test.go b/htmltest/check-link_test.go index d0474d3..d440772 100644 --- a/htmltest/check-link_test.go +++ b/htmltest/check-link_test.go @@ -42,6 +42,13 @@ func TestAnchorExternalBroken(t *testing.T) { tExpectIssueCount(t, hT, 1) } +func TestAnchorExternalBrokenOption(t *testing.T) { + // passes for broken external links when asked + hT := tTestFileOpts("fixtures/links/brokenLinkExternal.html", + map[string]interface{}{"IgnoreExternalBrokenLinks": true, "VCREnable": true}) + tExpectIssueCount(t, hT, 0) +} + func TestAnchorExternalBrokenNoVCR(t *testing.T) { // fails for broken external links without VCR. This is needed as the code that handles 'dial tcp' errors doesn't // get called with VCR. It returns a rather empty response with status code of 0. @@ -50,6 +57,14 @@ func TestAnchorExternalBrokenNoVCR(t *testing.T) { tExpectIssueCount(t, hT, 1) } +func TestAnchorExternalBrokenOptionNoVCR(t *testing.T) { + // passes for broken external links without VCR when asked. + tSkipShortExternal(t) + hT := tTestFileOpts("fixtures/links/brokenLinkExternal.html", + map[string]interface{}{"IgnoreExternalBrokenLinks": true}) + tExpectIssueCount(t, hT, 0) +} + func TestAnchorExternalIgnore(t *testing.T) { // ignores external links when asked hT := tTestFileOpts("fixtures/links/brokenLinkExternal.html", @@ -103,6 +118,13 @@ func TestAnchorExternalInsecureOption(t *testing.T) { tExpectIssue(t, hT, "is not an HTTPS target", 1) } +func TestAnchorExternalBrokenOptionInsecure(t *testing.T) { + // passes for non-HTTPS links when asked + hT := tTestFileOpts("fixtures/links/non_https.html", + map[string]interface{}{"EnforceHTTPS": true, "IgnoreExternalBrokenLinks": true, "VCREnable": true}) + tExpectIssueCount(t, hT, 0) +} + func TestAnchorExternalInsecureOptionIgnored(t *testing.T) { // passes when checking for non-HTTPS links but they're in the IgnoreURLs list hT := tTestFileOpts("fixtures/links/issues/94.html", @@ -120,6 +142,13 @@ func TestAnchorExternalHrefIP(t *testing.T) { tExpectIssueCount(t, hT, 2) } +func TestAnchorExternalBrokenOptionHrefIP(t *testing.T) { + // passes for broken IP address links when asked + hT := tTestFileOpts("fixtures/links/ip_href.html", + map[string]interface{}{"VCREnable": true, "IgnoreExternalBrokenLinks": true}) + tExpectIssueCount(t, hT, 0) +} + func TestAnchorExternalHrefIPTimeout(t *testing.T) { // fails for broken IP address links hT := tTestFileOpts("fixtures/links/ip_timeout.html", @@ -128,6 +157,13 @@ func TestAnchorExternalHrefIPTimeout(t *testing.T) { tExpectIssue(t, hT, "request exceeded our ExternalTimeout", 1) } +func TestAnchorExternalBrokenOptionHrefIPTimeout(t *testing.T) { + // passes for broken IP address links when aksed + hT := tTestFileOpts("fixtures/links/ip_timeout.html", + map[string]interface{}{"IgnoreExternalBrokenLinks": true, "ExternalTimeout": 1}) + tExpectIssueCount(t, hT, 0) +} + func TestAnchorExternalFollowRedirects(t *testing.T) { // should follow redirects hT := tTestFileOpts("fixtures/links/linkWithRedirect.html", @@ -158,6 +194,13 @@ func TestAnchorExternalHTTPSInvalid(t *testing.T) { tExpectIssueCount(t, hT, 6) } +func TestAnchorExternalBrokenOptionHTTPSInvalid(t *testing.T) { + // should pass for invalid https when asked + hT := tTestFileOpts("fixtures/links/https-invalid.html", + map[string]interface{}{"IgnoreExternalBrokenLinks": true, "VCREnable": true}) + tExpectIssueCount(t, hT, 0) +} + func TestAnchorExternalHTTPSMissingChain(t *testing.T) { // should support https aia // see issue #130 @@ -166,6 +209,13 @@ func TestAnchorExternalHTTPSMissingChain(t *testing.T) { tExpectIssue(t, hT, "incomplete certificate chain", 1) } +func TestAnchorExternalBrokenOptionHTTPSMissingChain(t *testing.T) { + // should pass for incomplete chains when asked + hT := tTestFileOpts("fixtures/links/https-incomplete-chain.html", + map[string]interface{}{"IgnoreExternalBrokenLinks": true, "VCREnable": false}) + tExpectIssueCount(t, hT, 0) +} + func TestAnchorExternalHTTPSBadH2(t *testing.T) { // should connect to servers with bad http/2 support // See issue #49 @@ -197,6 +247,13 @@ func TestAnchorExternalMissingProtocolInvalid(t *testing.T) { // tExpectIssue(t, hT, "no such host", 1) } +func TestAnchorExternalBrokenOptionMissingProtocol(t *testing.T) { + // passes for invalid links missing the protocol when asked + hT := tTestFileOpts("fixtures/links/link_missing_protocol_invalid.html", + map[string]interface{}{"IgnoreExternalBrokenLinks": true, "VCREnable": true}) + tExpectIssueCount(t, hT, 0) +} + func TestLinkExternalHrefPipes(t *testing.T) { // works for pipes in the URL hT := tTestFileOpts("fixtures/links/escape_pipes.html", diff --git a/htmltest/options.go b/htmltest/options.go index 7c2dfe9..a38c880 100644 --- a/htmltest/options.go +++ b/htmltest/options.go @@ -43,6 +43,7 @@ type Options struct { IgnoreInternalEmptyHash bool IgnoreEmptyHref bool IgnoreCanonicalBrokenLinks bool + IgnoreExternalBrokenLinks bool IgnoreAltMissing bool IgnoreDirectoryMissingTrailingSlash bool IgnoreSSLVerify bool @@ -106,6 +107,7 @@ func DefaultOptions() map[string]interface{} { "IgnoreInternalEmptyHash": false, "IgnoreEmptyHref": false, "IgnoreCanonicalBrokenLinks": true, + "IgnoreExternalBrokenLinks": false, "IgnoreAltMissing": false, "IgnoreDirectoryMissingTrailingSlash": false, "IgnoreSSLVerify": false,