From 06587e43a26bdc4b50d9fa83d50fe45ef6eb2e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sun, 22 Sep 2019 10:12:51 +0100 Subject: [PATCH] don't allow underscores and asterisks as end chars This way, we match URLs within markdown emphasis like *this* or _this_ without the trailing character. It's highly unlikely that URLs would genuinely want to end in either of those characters. Asterisks are reserved, so they shouldn't be in the path to begin with. And underscores are most common in the middle of the path, and are already common to style plaintext at the start or end of URLs. Still allow them in the middle of URLs, because there's no reason not to. Fixes #29. --- xurls.go | 4 ++-- xurls_test.go | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/xurls.go b/xurls.go index e29a194..7244c70 100644 --- a/xurls.go +++ b/xurls.go @@ -19,9 +19,9 @@ const ( iriChar = letter + mark + number currency = `\p{Sc}` otherSymb = `\p{So}` - endChar = iriChar + `/\-+_&~*%=#` + currency + otherSymb + endChar = iriChar + `/\-+&~%=#` + currency + otherSymb otherPunc = `\p{Po}` - midChar = endChar + otherPunc + midChar = endChar + "_*" + otherPunc wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)` wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]` wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}` diff --git a/xurls_test.go b/xurls_test.go index a99003b..fc17fe7 100644 --- a/xurls_test.go +++ b/xurls_test.go @@ -100,6 +100,10 @@ var constantTestCases = []testCase{ {`.http://foo.com/bar.more`, `http://foo.com/bar.more`}, {`,http://foo.com/bar,`, `http://foo.com/bar`}, {`,http://foo.com/bar,more`, `http://foo.com/bar,more`}, + {`*http://foo.com/bar*`, `http://foo.com/bar`}, + {`*http://foo.com/bar*more`, `http://foo.com/bar*more`}, + {`_http://foo.com/bar_`, `http://foo.com/bar`}, + {`_http://foo.com/bar_more`, `http://foo.com/bar_more`}, {`(http://foo.com/bar)`, `http://foo.com/bar`}, {`(http://foo.com/bar)more`, `http://foo.com/bar`}, {`[http://foo.com/bar]`, `http://foo.com/bar`}, @@ -107,16 +111,15 @@ var constantTestCases = []testCase{ {`'http://foo.com/bar'`, `http://foo.com/bar`}, {`'http://foo.com/bar'more`, `http://foo.com/bar'more`}, {`"http://foo.com/bar"`, `http://foo.com/bar`}, + {`"http://foo.com/bar"more`, `http://foo.com/bar"more`}, {`http://a.b/a0/-+_&~*%=#@.,:;'?![]()a`, true}, {`http://a.b/a0/$€¥`, true}, {`http://✪foo.bar/pa✪th©more`, true}, {`http://foo.bar/path/`, true}, {`http://foo.bar/path-`, true}, {`http://foo.bar/path+`, true}, - {`http://foo.bar/path_`, true}, {`http://foo.bar/path&`, true}, {`http://foo.bar/path~`, true}, - {`http://foo.bar/path*`, true}, {`http://foo.bar/path%`, true}, {`http://foo.bar/path=`, true}, {`http://foo.bar/path#`, true},