Skip to content

Commit

Permalink
decode unicode chars
Browse files Browse the repository at this point in the history
  • Loading branch information
dogancanbakir committed Aug 17, 2023
1 parent c1b8b57 commit 25f7bd6
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
15 changes: 14 additions & 1 deletion url/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package urlutil
import (
"bytes"
"net/url"
"regexp"
"strconv"
"strings"

errorutil "github.com/projectdiscovery/utils/errors"
Expand Down Expand Up @@ -222,9 +224,19 @@ func (u *URL) fetchParams() {

// ParseURL
func Parse(inputURL string) (*URL, error) {
inputURL = decodeUnicodeEscapes(inputURL)
return ParseURL(inputURL, false)
}

// decodeUnicodeEscapes replaces %uXXXX escapes with their actual characters.
func decodeUnicodeEscapes(inputURL string) string {
re := regexp.MustCompile(`%u([0-9a-fA-F]{4})`)
return re.ReplaceAllStringFunc(inputURL, func(match string) string {
code, _ := strconv.ParseInt(match[2:], 16, 32)
return string(rune(code))
})
}

// Parse and return URL
func ParseURL(inputURL string, unsafe bool) (*URL, error) {
u := &URL{
Expand Down Expand Up @@ -291,7 +303,8 @@ func ParseURL(inputURL string, unsafe bool) (*URL, error) {
// TODO: should use a proper regex to validate hostname/ip
// currently domain names without (.) are not considered as valid and autocorrected
// if DisableAutoCorrect is false
if !strings.Contains(u.Host, ".") && !strings.Contains(u.Host, ":") && u.Host != "localhost" {
if !stringsutil.HasPrefixAny(inputURL, HTTP+SchemeSeparator, HTTPS+SchemeSeparator, "//") &&
!strings.Contains(u.Host, ".") && !strings.Contains(u.Host, ":") && u.Host != "localhost" {
// this does not look like a valid domain , ipv4 or ipv6
// consider it as relative
if !DisableAutoCorrect {
Expand Down
15 changes: 15 additions & 0 deletions url/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,18 @@ func TestParseRelativePath(t *testing.T) {
require.Equal(t, v.expectedPath, urlx.GetRelativePath())
}
}

func TestUnicodeEscape(t *testing.T) {
testcases := []struct {
input string
expected string
}{
{"https://admin/%u002e%u002e/%u002e%u002e/1.txt.it", "https://admin/../../1.txt.it"},
}

for _, v := range testcases {
urlx, err := Parse(v.input)
require.Nilf(t, err, "got error for url %v", v.input)
require.Equal(t, v.expected, urlx.String())
}
}

0 comments on commit 25f7bd6

Please sign in to comment.