diff --git a/canonicalizer/canonicalizer.go b/canonicalizer/canonicalizer.go index f727600..b2258dc 100644 --- a/canonicalizer/canonicalizer.go +++ b/canonicalizer/canonicalizer.go @@ -49,7 +49,7 @@ type profile struct { func (p *profile) Parse(rawUrl string) (*url.Url, error) { u, err := p.Parser.Parse(rawUrl) if err != nil { - if errors.Code(err) == errors.FailRelativeUrlWithNoBase && p.defaultScheme != "" { + if errors.Type(err) == errors.MissingSchemeNonRelativeURL && p.defaultScheme != "" { rawUrl = p.defaultScheme + "://" + rawUrl u, err = p.Parser.Parse(rawUrl) } @@ -64,7 +64,7 @@ func (p *profile) Parse(rawUrl string) (*url.Url, error) { func (p *profile) ParseRef(rawUrl, ref string) (*url.Url, error) { b, err := p.Parser.Parse(rawUrl) if err != nil { - if errors.Code(err) == errors.FailRelativeUrlWithNoBase && p.defaultScheme != "" { + if errors.Type(err) == errors.MissingSchemeNonRelativeURL && p.defaultScheme != "" { rawUrl = p.defaultScheme + "://" + rawUrl b, err = p.Parser.Parse(rawUrl) } diff --git a/errors/codes.go b/errors/codes.go index ea7c5a7..0fa37d8 100644 --- a/errors/codes.go +++ b/errors/codes.go @@ -16,65 +16,45 @@ package errors -import ( - "fmt" -) - -// ErrorCode is data type of error codes for different kind of errors -type ErrorCode int32 +type ErrorType string -// Validation errors +// IDNA errors const ( - IllegalCodePoint ErrorCode = iota + 100 - InvalidPercentEncoding - IllegalLeadingOrTrailingChar - IllegalTabOrNewline - AtInAuthority - IllegalSlashes - IllegalLocalFileAndHostCombo - BadWindowsDriveLetter - IllegalIPv4Address - IllegalIPv6Address - CouldNotDecodeHost + DomainToASCII ErrorType = "Unicode ToASCII records an error or returns the empty string" + DomainToUnicode ErrorType = "Unicode ToUnicode records an error" ) -// Validation failures +// Host parsing errors const ( - FailIllegalCodePoint ErrorCode = iota + 500 - FailIllegalScheme - FailRelativeUrlWithNoBase - FailMissingHost - FailIllegalHost - FailIllegalPort + DomainInvalidCodePoint ErrorType = "The host contains a forbidden domain code point" + HostInvalidCodePoint ErrorType = "An opaque host (in a URL that is not special) contains a forbidden host code point" + IPv4EmptyPart ErrorType = "An IPv4 address ends with a U+002E (.)" + IPv4TooManyParts ErrorType = "An IPv4 address has more than four parts" + IPv4NonNumericPart ErrorType = "An IPv4 address contains a non-numeric part" + IPv4NonDecimalPart ErrorType = "The IPv4 address contains numbers expressed using hexadecimal or octal digits" + IPv4OutOfRangePart ErrorType = "An IPv4 address contains a part that is greater than 255" + IPv6Unclosed ErrorType = "An IPv6 address is missing the closing U+005D (])" + IPv6InvalidCompression ErrorType = "An IPv6 address begins with improper compression" + IPv6TooManyPieces ErrorType = "An IPv6 address has more than eight pieces" + IPv6MultipleCompression ErrorType = "An IPv6 address contains multiple instances of '::'" + IPv6InvalidCodePoint ErrorType = "An IPv6 address contains a code point that is neither an ASCII hex digit nor a U+003A (:). Or it unexpectedly ends" + IPv6TooFewPieces ErrorType = "An uncompressed IPv6 address contains fewer than 8 pieces" + IPv4InIPv6TooManyPieces ErrorType = "An IPv4 address is found in an IPv6 address, but the IPv6 address has more than 6 pieces" + IPv4InIPv6InvalidCodePoint ErrorType = "An IPv4 address is found in an IPv6 address and one of the following is true: 1. An IPv4 part is empty or contains a non-ASCII digit. 2. An IPv4 part contains a leading 0. 3. There are too many IPv4 parts" + IPv4InIPv6OutOfRangePart ErrorType = "An IPv4 address is found in an IPv6 address and one of the IPv4 parts is greater than 255" + IPv4InIPv6TooFewParts ErrorType = "An IPv4 address is found in an IPv6 address and there are too few IPv4 parts" ) -func (e ErrorCode) String() string { - return fmt.Sprintf("%d: %s", e, messages[e]) -} - -func (e ErrorCode) Int32() int32 { - return int32(e) -} - -var messages = map[ErrorCode]string{ - // Validation errors - IllegalCodePoint: "illegal code point", - InvalidPercentEncoding: "invalid percent encoding", - IllegalLeadingOrTrailingChar: "illegal leading or trailing character", - IllegalTabOrNewline: "illegal tab or newline", - AtInAuthority: "'@' in authority", - IllegalSlashes: "illegal combination of slashes", - IllegalLocalFileAndHostCombo: "illegal combination of host and local file reference", - BadWindowsDriveLetter: "badly formatted windows drive letter", - IllegalIPv4Address: "illegal IPv4 address", - IllegalIPv6Address: "illegal IPv6 address", - CouldNotDecodeHost: "could not decode host", - - // Validation failures - FailIllegalCodePoint: "illegal code point", - FailIllegalScheme: "illegal scheme", - FailRelativeUrlWithNoBase: "relative url with missing or invalid base url", - FailMissingHost: "missing host", - FailIllegalHost: "illegal host", - FailIllegalPort: "illegal port", -} +// URL parsing errors +const ( + InvalidURLUnit ErrorType = "A code point is found that is not a URL unit" + SpecialSchemeMissingFollowingSolidus ErrorType = "The input’s scheme is not followed by '//'" + MissingSchemeNonRelativeURL ErrorType = "The input is missing a scheme, because it does not begin with an ASCII alpha, and either no base URL was provided or the base URL cannot be used as a base URL because it has an opaque path" + InvalidReverseSolidus ErrorType = "The URL has a special scheme and it uses U+005C (\\) instead of U+002F (/)" + InvalidCredentials ErrorType = "The input includes credentials" + HostMissing ErrorType = "The input has a special scheme, but does not contain a host" + PortOutOfRange ErrorType = "The input's port is outside the range [0-65535]" + PortInvalid ErrorType = "The input's port is not a number" + FileInvalidWindowsDriveLetter ErrorType = "The input is a relative-URL string that starts with a Windows drive letter and the base URL’s scheme is 'file'" + FileInvalidWindowsDriveLetterHost ErrorType = "A file: URL’s host is a Windows drive letter" +) diff --git a/errors/errors.go b/errors/errors.go index c388823..8fc4241 100644 --- a/errors/errors.go +++ b/errors/errors.go @@ -20,21 +20,22 @@ import ( "fmt" ) -// UrlError is the struct of url error -type UrlError struct { - code ErrorCode - descr string - url string - cause error // the root cause for this error +// ValidationError indicates that the url is not valid +type ValidationError struct { + errorType ErrorType + cause error // the root cause for this error + descr string // description of the error + failure bool // true if the error is a failure, false if it is a warning + url string } -func (e *UrlError) Error() string { - errMsg := fmt.Sprintf("Error: %s", e.code) +func (e *ValidationError) Error() string { + errMsg := fmt.Sprintf("Error: %s", e.errorType) if e.descr != "" { - errMsg += fmt.Sprintf(" '%s'", e.descr) + errMsg += fmt.Sprintf(": '%s'", e.descr) } if e.url != "" { - errMsg += fmt.Sprintf(", Url: %s", e.url) + errMsg += fmt.Sprintf(". Url: '%s'", e.url) } if nil == e.cause { return errMsg @@ -43,29 +44,42 @@ func (e *UrlError) Error() string { return errMsg + ", Cause: " + e.cause.Error() } -func (e *UrlError) Unwrap() error { +// Unwrap returns the root cause for this error +func (e *ValidationError) Unwrap() error { return e.cause } -func (e *UrlError) Code() ErrorCode { - return e.code +// Type returns the error type +func (e *ValidationError) Type() ErrorType { + return e.errorType } -func (e *UrlError) Url() string { +// Url returns the url causing the error +func (e *ValidationError) Url() string { return e.url } -// Code returns the error code -func Code(err error) ErrorCode { - type coder interface { - Code() ErrorCode +// Failure returns true if the error is a failure, false if it is a warning +func (e *ValidationError) Failure() bool { + return e.failure +} + +// Description returns the error description +func (e *ValidationError) Description() string { + return e.descr +} + +// Type returns the error type +func Type(err error) ErrorType { + type typer interface { + Type() ErrorType } - cd, ok := err.(coder) + cd, ok := err.(typer) if !ok { - return 0 + return "" } - return cd.Code() + return cd.Type() } // Description returns the error description @@ -94,38 +108,56 @@ func Url(err error) string { return m.Url() } +// Failure returns true if the error is a failure, false if it is a warning. +// If the error does not implement the Failure() method, true is returned +func Failure(err error) bool { + type failure interface { + Failure() bool + } + + m, ok := err.(failure) + if !ok { + return true + } + return m.Failure() +} + // Error constructs a new error -func Error(code ErrorCode, url string) error { - return &UrlError{ - code: code, - url: url, +func Error(errorType ErrorType, url string, failure bool) error { + return &ValidationError{ + errorType: errorType, + url: url, + failure: failure, } } // ErrorWithDescr constructs a new error -func ErrorWithDescr(code ErrorCode, descr string, url string) error { - return &UrlError{ - code: code, - descr: descr, - url: url, +func ErrorWithDescr(errorType ErrorType, descr string, url string, failure bool) error { + return &ValidationError{ + errorType: errorType, + descr: descr, + url: url, + failure: failure, } } // Wrap wraps an error with an error code and url -func Wrap(err error, code ErrorCode, url string) error { - return &UrlError{ - code: code, - url: url, - cause: err, +func Wrap(err error, errorType ErrorType, url string, failure bool) error { + return &ValidationError{ + errorType: errorType, + url: url, + cause: err, + failure: failure, } } // WrapWithDescr wraps an error with an error code, url and a description -func WrapWithDescr(err error, code ErrorCode, descr string, url string) error { - return &UrlError{ - code: code, - descr: descr, - url: url, - cause: err, +func WrapWithDescr(err error, errorType ErrorType, descr string, url string, failure bool) error { + return &ValidationError{ + errorType: errorType, + descr: descr, + url: url, + cause: err, + failure: failure, } } diff --git a/errors/errors_test.go b/errors/errors_test.go index c0eae45..44b2cef 100644 --- a/errors/errors_test.go +++ b/errors/errors_test.go @@ -22,22 +22,19 @@ import ( ) func TestError(t *testing.T) { - type args struct { - code ErrorCode - url string - } tests := []struct { - name string - code ErrorCode - url string + name string + errorType ErrorType + url string + failure bool }{ - {"1", IllegalLeadingOrTrailingChar, "http://example.com\t"}, + {"1", HostInvalidCodePoint, "http://example.com\t", false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := Error(tt.code, tt.url) + err := Error(tt.errorType, tt.url, false) fmt.Printf("Error: %s\n", err) - fmt.Printf("Code: %s\n", Code(err)) + fmt.Printf("Type: %s\n", Type(err)) fmt.Printf("Url: %s\n", Url(err)) //if err := Error(tt.args.code, tt.args.url); (err != nil) != tt.wantErr { // t.Errorf("Error() error = %v, wantErr %v", err, tt.wantErr) diff --git a/url/errorhandler.go b/url/errorhandler.go index 2d782ce..80b1dbb 100644 --- a/url/errorhandler.go +++ b/url/errorhandler.go @@ -20,16 +20,50 @@ import ( "github.com/nlnwa/whatwg-url/errors" ) -func (p *parser) handleError(u *Url, code errors.ErrorCode) error { +// handleError handles an error according to the options set for the parser +func (p *parser) handleError(u *Url, errorType errors.ErrorType, failure bool) error { + e := errors.Error(errorType, u.inputUrl, failure) if p.opts.reportValidationErrors { - u.validationErrors = append(u.validationErrors, errors.Error(code, u.inputUrl)) + u.validationErrors = append(u.validationErrors, e) } - if p.opts.failOnValidationError { - return errors.Error(code, u.inputUrl) + if failure || p.opts.failOnValidationError { + return e } return nil } -func (p *parser) handleFailure(u *Url, code errors.ErrorCode, err error) (*Url, error) { - return nil, errors.Wrap(err, code, u.inputUrl) +// handleErrorWithDescription handles an error according to the options set for the parser +func (p *parser) handleErrorWithDescription(u *Url, errorType errors.ErrorType, failure bool, descr string) error { + e := errors.ErrorWithDescr(errorType, descr, u.inputUrl, failure) + if p.opts.reportValidationErrors { + u.validationErrors = append(u.validationErrors, e) + } + if failure || p.opts.failOnValidationError { + return e + } + return nil +} + +// handleWrappedError handles an error according to the options set for the parser +func (p *parser) handleWrappedError(u *Url, errorType errors.ErrorType, failure bool, cause error) error { + e := errors.Wrap(cause, errorType, u.inputUrl, failure) + if p.opts.reportValidationErrors { + u.validationErrors = append(u.validationErrors, e) + } + if failure || p.opts.failOnValidationError { + return e + } + return nil +} + +// handleWrappedErrorWithDescription handles an error according to the options set for the parser +func (p *parser) handleWrappedErrorWithDescription(u *Url, errorType errors.ErrorType, failure bool, cause error, descr string) error { + e := errors.WrapWithDescr(cause, errorType, descr, u.inputUrl, failure) + if p.opts.reportValidationErrors { + u.validationErrors = append(u.validationErrors, e) + } + if failure || p.opts.failOnValidationError { + return e + } + return nil } diff --git a/url/hostparser.go b/url/hostparser.go index 3ffa48d..288dc18 100644 --- a/url/hostparser.go +++ b/url/hostparser.go @@ -20,7 +20,6 @@ import ( goerrors "errors" "fmt" "math" - "net/url" "strconv" "strings" "unicode/utf8" @@ -40,28 +39,26 @@ func (p *parser) parseHost(u *Url, parser *parser, input string, isNotSpecial bo } if input[0] == '[' { if !strings.HasSuffix(input, "]") { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv6Unclosed, true); err != nil { + return "", err + } } input = strings.Trim(input, "[]") return p.parseIPv6(u, newInputString(input)) } if isNotSpecial { - return p.parseOpaqueHost(input) + return p.parseOpaqueHost(u, input) } - domain, err := url.PathUnescape(input) - if err != nil { - if p.opts.laxHostParsing { - return input, nil - } - return "", errors.Wrap(err, errors.CouldNotDecodeHost, "") - } + domain := p.DecodePercentEncoded(input) if !utf8.ValidString(domain) { if p.opts.laxHostParsing { return percentEncodeString(input, HostPercentEncodeSet), nil } - return "", errors.ErrorWithDescr(errors.CouldNotDecodeHost, "not a valid UTF-8 string", "") + if err := p.handleErrorWithDescription(u, errors.DomainToASCII, true, "not a valid UTF-8 string"); err != nil { + return "", err + } } asciiDomain, err := p.ToASCII(domain) @@ -69,19 +66,23 @@ func (p *parser) parseHost(u *Url, parser *parser, input string, isNotSpecial bo if p.opts.laxHostParsing { return domain, nil } - return "", errors.Wrap(err, errors.CouldNotDecodeHost, "") + if err := p.handleWrappedError(u, errors.DomainToASCII, true, err); err != nil { + return "", err + } } for _, c := range asciiDomain { if ForbiddenDomainCodePoint.Test(uint(c)) { if p.opts.laxHostParsing { return parser.PercentEncodeString(asciiDomain, HostPercentEncodeSet), nil } else { - return "", errors.ErrorWithDescr(errors.IllegalCodePoint, string(c), "") + if err := p.handleErrorWithDescription(u, errors.DomainInvalidCodePoint, true, string(c)); err != nil { + return "", err + } } } } - if p.endsInANumber(asciiDomain) { + if p.endsInANumber(u, asciiDomain) { ipv4Host, err := p.parseIPv4(u, asciiDomain) return ipv4Host, err } @@ -92,7 +93,7 @@ func (p *parser) parseHost(u *Url, parser *parser, input string, isNotSpecial bo return asciiDomain, nil } -func (p *parser) endsInANumber(input string) bool { +func (p *parser) endsInANumber(u *Url, input string) bool { parts := strings.Split(input, ".") if parts[len(parts)-1] == "" { if len(parts) == 1 { @@ -104,16 +105,17 @@ func (p *parser) endsInANumber(input string) bool { if last != "" && containsOnly(last, ASCIIDigit) { return true } - if _, _, err := p.parseIPv4Number(last); err == nil || goerrors.Is(err, strconv.ErrRange) { + if _, _, err := p.parseIPv4Number(u, last); err == nil || goerrors.Is(err, strconv.ErrRange) { return true } return false } -func (p *parser) parseIPv4Number(input string) (number int64, validationError bool, err error) { +func (p *parser) parseIPv4Number(u *Url, input string) (number int64, validationError bool, err error) { if input == "" { - err = errors.ErrorWithDescr(errors.CouldNotDecodeHost, "empty IPv4 number", "") - return + if err = p.handleError(u, errors.IPv4EmptyPart, true); err != nil { + return + } } R := 10 if len(input) >= 2 && (strings.HasPrefix(input, "0x") || strings.HasPrefix(input, "0X")) { @@ -136,7 +138,7 @@ func (p *parser) parseIPv4Number(input string) (number int64, validationError bo func (p *parser) parseIPv4(u *Url, input string) (string, error) { parts := strings.Split(input, ".") if parts[len(parts)-1] == "" { - if err := p.handleError(u, errors.IllegalIPv4Address); err != nil { + if err := p.handleError(u, errors.IPv4EmptyPart, false); err != nil { return input, err } if len(parts) > 1 { @@ -144,17 +146,20 @@ func (p *parser) parseIPv4(u *Url, input string) (string, error) { } } if len(parts) > 4 { - _, err := p.handleFailure(u, errors.IllegalIPv4Address, fmt.Errorf("IPv4 too many parts")) - return "", err + if err := p.handleError(u, errors.IPv4TooManyParts, true); err != nil { + return input, err + } } var numbers []int64 for _, part := range parts { - n, validationError, err := p.parseIPv4Number(part) + n, validationError, err := p.parseIPv4Number(u, part) if err != nil { - return input, err + if err := p.handleWrappedError(u, errors.IPv4NonNumericPart, true, err); err != nil { + return input, err + } } if validationError { - if err := p.handleError(u, errors.IllegalIPv4Address); err != nil { + if err := p.handleError(u, errors.IPv4NonDecimalPart, false); err != nil { return input, err } } @@ -162,18 +167,22 @@ func (p *parser) parseIPv4(u *Url, input string) (string, error) { } for _, n := range numbers { if n > 255 { - if err := p.handleError(u, errors.IllegalIPv4Address); err != nil { + if err := p.handleError(u, errors.IPv4OutOfRangePart, false); err != nil { return input, err } } } for _, n := range numbers[:len(numbers)-1] { if n > 255 { - return "", errors.Error(errors.IllegalIPv4Address, "") + if err := p.handleError(u, errors.IPv4OutOfRangePart, true); err != nil { + return "", err + } } } if numbers[len(numbers)-1] >= int64(math.Pow(256, float64(5-len(numbers)))) { - return "", errors.Error(errors.IllegalIPv4Address, "") + if err := p.handleError(u, errors.IPv4OutOfRangePart, true); err != nil { + return "", err + } } var ipv4 = IPv4Addr(numbers[len(numbers)-1]) numbers = numbers[:len(numbers)-1] @@ -193,7 +202,9 @@ func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) { c := input.nextCodePoint() if c == ':' { if !input.remainingStartsWith(":") { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv6InvalidCompression, true); err != nil { + return "", err + } } input.nextCodePoint() c = input.nextCodePoint() @@ -202,11 +213,15 @@ func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) { } for !input.eof { if pieceIdx == 8 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv6TooManyPieces, true); err != nil { + return "", err + } } if c == ':' { if compress >= 0 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv6MultipleCompression, true); err != nil { + return "", err + } } c = input.nextCodePoint() pieceIdx++ @@ -225,12 +240,16 @@ func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) { if c == '.' { if length == 0 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil { + return "", err + } } input.rewind(length + 1) c = input.nextCodePoint() if pieceIdx > 6 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv4InIPv6TooManyPieces, true); err != nil { + return "", err + } } numbersSeen := 0 for !input.eof { @@ -239,24 +258,32 @@ func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) { if c == '.' && numbersSeen < 4 { c = input.nextCodePoint() } else { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil { + return "", err + } } } if !ASCIIDigit.Test(uint(c)) { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil { + return "", err + } } for ASCIIDigit.Test(uint(c)) { number, _ := strconv.Atoi(string(c)) if ipv4Piece < 0 { ipv4Piece = number } else if ipv4Piece == 0 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv4InIPv6InvalidCodePoint, true); err != nil { + return "", err + } } else { ipv4Piece = ipv4Piece*10 + number } if ipv4Piece > 255 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv4InIPv6OutOfRangePart, true); err != nil { + return "", err + } } c = input.nextCodePoint() } @@ -267,16 +294,22 @@ func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) { } } if numbersSeen != 4 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv4InIPv6TooFewParts, true); err != nil { + return "", err + } } break } else if c == ':' { c = input.nextCodePoint() if input.eof { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv6InvalidCodePoint, true); err != nil { + return "", err + } } } else if !input.eof { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv6InvalidCodePoint, true); err != nil { + return "", err + } } address[pieceIdx] = uint16(value) pieceIdx++ @@ -292,22 +325,40 @@ func (p *parser) parseIPv6(u *Url, input *inputString) (string, error) { swaps-- } } else if compress < 0 && pieceIdx != 8 { - return "", errors.Error(errors.IllegalIPv6Address, "") + if err := p.handleError(u, errors.IPv6TooFewPieces, true); err != nil { + return "", err + } } u.isIPv6 = true return "[" + address.String() + "]", nil } -func (p *parser) parseOpaqueHost(input string) (string, error) { +func (p *parser) parseOpaqueHost(u *Url, input string) (string, error) { output := "" for _, c := range input { - if ForbiddenHostCodePoint.Test(uint(c)) && c != '%' { + if ForbiddenHostCodePoint.Test(uint(c)) { if p.opts.laxHostParsing { return input, nil } else { - return "", errors.ErrorWithDescr(errors.IllegalCodePoint, string(c), "") + if err := p.handleErrorWithDescription(u, errors.HostInvalidCodePoint, true, string(c)); err != nil { + return "", err + } + } + } + if !isURLCodePoint(c) && c != '%' { + if err := p.handleErrorWithDescription(u, errors.InvalidURLUnit, false, string(c)); err != nil { + return input, err } } + if c == '%' { + invalidPercentEncoding, d := remainingIsInvalidPercentEncoded([]rune(input)) + if invalidPercentEncoding { + if err := p.handleErrorWithDescription(u, errors.InvalidURLUnit, false, d); err != nil { + return input, err + } + } + } + output += p.percentEncodeRune(c, C0PercentEncodeSet) } return output, nil diff --git a/url/inputstring.go b/url/inputstring.go index 135ad30..46892c1 100644 --- a/url/inputstring.go +++ b/url/inputstring.go @@ -93,13 +93,24 @@ func (i *inputString) remainingStartsWith(s string) bool { return strings.HasPrefix(string(i.runes[i.pointer+1:]), s) } -func (i *inputString) remainingIsInvalidPercentEncoded() bool { - if !i.eof && i.runes[i.pointer] == '%' && - (len(i.runes) < (i.pointer+3) || - (!ASCIIHexDigit.Test(uint(i.runes[i.pointer+1])) || !ASCIIHexDigit.Test(uint(i.runes[i.pointer+2])))) { - return true +// remainingIsInvalidPercentEncoded returns true if the first three characters in the rune array are not '%' followed by two hex digits. +// If true, the second return value is the invalid percent encoded string. +func (i *inputString) remainingIsInvalidPercentEncoded() (bool, string) { + return remainingIsInvalidPercentEncoded(i.runes[i.pointer:]) +} + +// remainingIsInvalidPercentEncoded returns true if the first three characters in the rune array are not '%' followed by two hex digits. +// If true, the second return value is the invalid percent encoded string. +func remainingIsInvalidPercentEncoded(runes []rune) (bool, string) { + if len(runes) >= 1 && runes[0] == '%' && + (len(runes) < 3 || (!ASCIIHexDigit.Test(uint(runes[1])) || !ASCIIHexDigit.Test(uint(runes[2])))) { + l := 3 + if len(runes) < 3 { + l = len(runes) + } + return true, string(runes[0:l]) } - return false + return false, "" } func (i *inputString) String() string { diff --git a/url/parser.go b/url/parser.go index 88216dc..352eca9 100644 --- a/url/parser.go +++ b/url/parser.go @@ -17,6 +17,7 @@ package url import ( + goerrors "errors" u2 "net/url" "strconv" "strings" @@ -108,7 +109,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride if url == nil { url = &Url{inputUrl: urlOrRef, path: &path{}} if i, changed := trim(url.inputUrl, C0OrSpacePercentEncodeSet); changed { - if err := p.handleError(url, errors.IllegalLeadingOrTrailingChar); err != nil { + if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil { return nil, err } url.inputUrl = i @@ -119,7 +120,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride url.parser = p if i, changed := remove(url.inputUrl, ASCIITabOrNewline); changed { - if err := p.handleError(url, errors.IllegalTabOrNewline); err != nil { + if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil { return nil, err } url.inputUrl = i @@ -150,7 +151,9 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride state = stateNoScheme input.rewindLast() } else { - return p.handleFailure(url, errors.FailIllegalCodePoint, nil) + if err := p.handleError(url, errors.InvalidURLUnit, true); err != nil { + return nil, err + } } case stateScheme: tr := ASCIIAlphanumeric.Clone().Set(0x2b).Set(0x2d).Set(0x2e) @@ -183,7 +186,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride buffer.Reset() if url.protocol == "file" { if !input.remainingStartsWith("//") { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil { return nil, err } } @@ -204,11 +207,15 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride state = stateNoScheme input.reset() } else { - return p.handleFailure(url, errors.FailIllegalScheme, nil) + if err := p.handleError(url, errors.InvalidURLUnit, true); err != nil { + return nil, err + } } case stateNoScheme: if base == nil || (base.path.isOpaque() && r != '#') { - return p.handleFailure(url, errors.FailRelativeUrlWithNoBase, nil) + if err := p.handleError(url, errors.MissingSchemeNonRelativeURL, true); err != nil { + return nil, err + } } else if base != nil && base.path.isOpaque() && r == '#' { url.protocol = base.protocol url.path = base.path // TODO: Ensure copy???? @@ -227,7 +234,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride state = stateSpecialAuthorityIgnoreSlashes input.nextCodePoint() } else { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil { return nil, err } state = stateRelative @@ -245,7 +252,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride if r == '/' { state = stateRelativeSlash } else if url.isSpecialSchemeAndBackslash(r) { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil { return nil, err } state = stateRelativeSlash @@ -273,7 +280,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride case stateRelativeSlash: if url.IsSpecialScheme() && (r == '/' || r == '\\') { if r == '\\' { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil { return nil, err } } @@ -293,7 +300,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride state = stateSpecialAuthorityIgnoreSlashes input.nextCodePoint() } else { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil { return nil, err } state = stateSpecialAuthorityIgnoreSlashes @@ -304,13 +311,13 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride state = stateAuthority input.rewindLast() } else { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.SpecialSchemeMissingFollowingSolidus, false); err != nil { return nil, err } } case stateAuthority: if r == '@' { - if err := p.handleError(url, errors.AtInAuthority); err != nil { + if err := p.handleError(url, errors.InvalidCredentials, false); err != nil { return nil, err } if atFlag { @@ -340,7 +347,9 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride buffer.Reset() } else if (input.eof || r == '/' || r == '?' || r == '#') || url.isSpecialSchemeAndBackslash(r) { if atFlag && buffer.Len() == 0 { - return p.handleFailure(url, errors.FailMissingHost, nil) + if err := p.handleError(url, errors.InvalidCredentials, true); err != nil { + return nil, err + } } input.rewind(len([]rune(buffer.String())) + 1) buffer.Reset() @@ -356,14 +365,16 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride state = stateFileHost } else if r == ':' && !bracketFlag { if buffer.Len() == 0 { - return p.handleFailure(url, errors.FailMissingHost, nil) + if err := p.handleError(url, errors.HostMissing, true); err != nil { + return nil, err + } } if stateOverride == stateHostname { return url, nil } host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme()) if err != nil { - return p.handleFailure(url, errors.FailIllegalHost, err) + return url, err } url.host = &host buffer.Reset() @@ -371,13 +382,15 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride } else if (input.eof || r == '/' || r == '?' || r == '#') || url.isSpecialSchemeAndBackslash(r) { input.rewindLast() if url.IsSpecialScheme() && buffer.Len() == 0 { - return p.handleFailure(url, errors.FailMissingHost, nil) + if err := p.handleError(url, errors.HostMissing, true); err != nil { + return nil, err + } } else if stateOverridden && buffer.Len() == 0 && (url.username != "" || url.password != "" || url.port != nil) { return url, nil } else { host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme()) if err != nil { - return p.handleFailure(url, errors.FailIllegalHost, err) + return url, err } url.host = &host buffer.Reset() @@ -404,11 +417,10 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride } else if (input.eof || r == '/' || r == '?' || r == '#') || url.isSpecialSchemeAndBackslash(r) || stateOverridden { if buffer.Len() > 0 { port, err := strconv.Atoi(buffer.String()) - if err != nil { - return p.handleFailure(url, errors.FailIllegalPort, nil) - } - if port > 65535 { - return p.handleFailure(url, errors.FailIllegalPort, nil) + if port > 65535 || goerrors.Is(err, strconv.ErrRange) { + if err := p.handleWrappedError(url, errors.PortOutOfRange, true, err); err != nil { + return nil, err + } } portString := strconv.Itoa(port) url.decodedPort = port @@ -422,14 +434,16 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride state = statePathStart input.rewindLast() } else { - return p.handleFailure(url, errors.FailIllegalPort, nil) + if err := p.handleError(url, errors.PortInvalid, true); err != nil { + return nil, err + } } case stateFile: url.protocol = "file" url.host = new(string) if r == '/' || r == '\\' { if r == '\\' { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil { return nil, err } } @@ -449,7 +463,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride if !startsWithAWindowsDriveLetter(input.remainingFromPointer()) { url.path.shortenPath(url.protocol) } else { - if err := p.handleError(url, errors.BadWindowsDriveLetter); err != nil { + if err := p.handleError(url, errors.FileInvalidWindowsDriveLetter, false); err != nil { return nil, err } url.path.init() @@ -464,7 +478,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride case stateFileSlash: if r == '/' || r == '\\' { if r == '\\' { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil { return nil, err } } @@ -484,7 +498,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride if input.eof || r == '/' || r == '\\' || r == '?' || r == '#' { input.rewindLast() if !stateOverridden && isWindowsDriveLetter(buffer.String()) { - if err := p.handleError(url, errors.BadWindowsDriveLetter); err != nil { + if err := p.handleError(url, errors.FileInvalidWindowsDriveLetterHost, false); err != nil { return nil, err } state = statePath @@ -497,7 +511,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride } else { host, err := p.parseHost(url, p, buffer.String(), !url.IsSpecialScheme()) if err != nil { - return p.handleFailure(url, errors.FailIllegalHost, err) + return url, err } if host == "localhost" { host = "" @@ -515,7 +529,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride case statePathStart: if url.IsSpecialScheme() && !p.opts.skipTrailingSlashNormalization { if r == '\\' { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil { return nil, err } } @@ -543,7 +557,7 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride (!stateOverridden && (r == '?' || r == '#')) { if url.isSpecialSchemeAndBackslash(r) { - if err := p.handleError(url, errors.IllegalSlashes); err != nil { + if err := p.handleError(url, errors.InvalidReverseSolidus, false); err != nil { return nil, err } } @@ -581,13 +595,13 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride } } else { if !isURLCodePoint(r) && r != '%' { - if err := p.handleError(url, errors.IllegalCodePoint); err != nil { + if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil { return nil, err } } - invalidPercentEncoding := input.remainingIsInvalidPercentEncoded() + invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded() if invalidPercentEncoding { - if err := p.handleError(url, errors.InvalidPercentEncoding); err != nil { + if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil { return nil, err } } @@ -608,13 +622,13 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride buffer.Reset() } else if !input.eof { if !isURLCodePoint(r) && r != '%' { - if err := p.handleError(url, errors.IllegalCodePoint); err != nil { + if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil { return nil, err } } - invalidPercentEncoding := input.remainingIsInvalidPercentEncoded() + invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded() if invalidPercentEncoding { - if err := p.handleError(url, errors.InvalidPercentEncoding); err != nil { + if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil { return nil, err } buffer.WriteString(p.percentEncodeInvalidRune(r, C0PercentEncodeSet)) @@ -631,12 +645,13 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride buffer.Reset() } else if !input.eof { if !isURLCodePoint(r) && r != '%' { - if err := p.handleError(url, errors.IllegalCodePoint); err != nil { + if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil { return nil, err } } - if input.remainingIsInvalidPercentEncoded() { - if err := p.handleError(url, errors.InvalidPercentEncoding); err != nil { + invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded() + if invalidPercentEncoding { + if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil { return nil, err } } @@ -651,12 +666,13 @@ func (p *parser) basicParser(urlOrRef string, base *Url, url *Url, stateOverride case stateFragment: if !input.eof { if !isURLCodePoint(r) && r != '%' { - if err := p.handleError(url, errors.IllegalCodePoint); err != nil { + if err := p.handleError(url, errors.InvalidURLUnit, false); err != nil { return nil, err } } - if input.remainingIsInvalidPercentEncoded() { - if err := p.handleError(url, errors.InvalidPercentEncoding); err != nil { + invalidPercentEncoding, d := input.remainingIsInvalidPercentEncoded() + if invalidPercentEncoding { + if err := p.handleErrorWithDescription(url, errors.InvalidURLUnit, false, d); err != nil { return nil, err } }