From 25f7bd61afbc2d7236c9c1b42a1eebe1f276df70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fan=20Can=20Bak=C4=B1r?= Date: Thu, 17 Aug 2023 13:31:19 +0000 Subject: [PATCH] decode unicode chars --- url/url.go | 15 ++++++++++++++- url/url_test.go | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/url/url.go b/url/url.go index a7d48a5..4d867f9 100644 --- a/url/url.go +++ b/url/url.go @@ -3,6 +3,8 @@ package urlutil import ( "bytes" "net/url" + "regexp" + "strconv" "strings" errorutil "github.com/projectdiscovery/utils/errors" @@ -222,9 +224,19 @@ func (u *URL) fetchParams() { // ParseURL func Parse(inputURL string) (*URL, error) { + inputURL = decodeUnicodeEscapes(inputURL) return ParseURL(inputURL, false) } +// decodeUnicodeEscapes replaces %uXXXX escapes with their actual characters. +func decodeUnicodeEscapes(inputURL string) string { + re := regexp.MustCompile(`%u([0-9a-fA-F]{4})`) + return re.ReplaceAllStringFunc(inputURL, func(match string) string { + code, _ := strconv.ParseInt(match[2:], 16, 32) + return string(rune(code)) + }) +} + // Parse and return URL func ParseURL(inputURL string, unsafe bool) (*URL, error) { u := &URL{ @@ -291,7 +303,8 @@ func ParseURL(inputURL string, unsafe bool) (*URL, error) { // TODO: should use a proper regex to validate hostname/ip // currently domain names without (.) are not considered as valid and autocorrected // if DisableAutoCorrect is false - if !strings.Contains(u.Host, ".") && !strings.Contains(u.Host, ":") && u.Host != "localhost" { + if !stringsutil.HasPrefixAny(inputURL, HTTP+SchemeSeparator, HTTPS+SchemeSeparator, "//") && + !strings.Contains(u.Host, ".") && !strings.Contains(u.Host, ":") && u.Host != "localhost" { // this does not look like a valid domain , ipv4 or ipv6 // consider it as relative if !DisableAutoCorrect { diff --git a/url/url_test.go b/url/url_test.go index 9dfdd81..e128bf3 100644 --- a/url/url_test.go +++ b/url/url_test.go @@ -117,3 +117,18 @@ func TestParseRelativePath(t *testing.T) { require.Equal(t, v.expectedPath, urlx.GetRelativePath()) } } + +func TestUnicodeEscape(t *testing.T) { + testcases := []struct { + input string + expected string + }{ + {"https://admin/%u002e%u002e/%u002e%u002e/1.txt.it", "https://admin/../../1.txt.it"}, + } + + for _, v := range testcases { + urlx, err := Parse(v.input) + require.Nilf(t, err, "got error for url %v", v.input) + require.Equal(t, v.expected, urlx.String()) + } +}