Skip to content

Commit

Permalink
Add URI::Generic#decoded_#{user,password}
Browse files Browse the repository at this point in the history
URI::Generic#{user,password} return the encoded values, which are
not that useful if you want to do authentication with them.
Automatic decoding by default would break backwards compatibility.
Optional automatic decoding via a keyword to URI.parse would
require threading the option through at least 3 other methods, and
would make semantics confusing (user= takes encoded or unencoded
password?) or require more work.  Thus, adding this as a separate
method seemed the simplest approach.

Unfortunately, URI lacks a method for correct decoding.  Unlike in
www form components, + in earlier parts of the URI such as the
userinfo section is treated verbatim and not as an encoded space.
Add URI.#{en,de}code_uri_component methods, which are almost the
same as URI.#{en,de}code_www_form_component, but without the
special SP => + handling.

Implements [Feature #9045]
  • Loading branch information
jeremyevans committed May 12, 2022
1 parent 92352e6 commit 16cfc4e
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 10 deletions.
41 changes: 33 additions & 8 deletions lib/uri/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ def self.regexp(schemes = nil)
256.times do |i|
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
end
TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
TBLENCWWWCOMP_[' '] = '+'
TBLENCWWWCOMP_.freeze
TBLDECWWWCOMP_ = {} # :nodoc:
Expand All @@ -320,6 +321,33 @@ def self.regexp(schemes = nil)
#
# See URI.decode_www_form_component, URI.encode_www_form.
def self.encode_www_form_component(str, enc=nil)
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
end

# Decodes given +str+ of URL-encoded form data.
#
# This decodes + to SP.
#
# See URI.encode_www_form_component, URI.decode_www_form.
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
_decode_uri_component(/\+|%\h\h/, str, enc)
end

# Encodes +str+ using URL encoding
#
# This encodes SP to %20 instead of +.
def self.encode_uri_component(str, enc=nil)
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
end

# Decodes given +str+ of URL-encoded data.
#
# This does not decode + to SP.
def self.decode_uri_component(str, enc=Encoding::UTF_8)
_decode_uri_component(/%\h\h/, str, enc)
end

def self._encode_uri_component(regexp, table, str, enc)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
Expand All @@ -328,19 +356,16 @@ def self.encode_www_form_component(str, enc=nil)
end
str.force_encoding(Encoding::ASCII_8BIT)
end
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
str.gsub!(regexp, table)
str.force_encoding(Encoding::US_ASCII)
end
private_class_method :_encode_uri_component

# Decodes given +str+ of URL-encoded form data.
#
# This decodes + to SP.
#
# See URI.encode_www_form_component, URI.decode_www_form.
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
def self._decode_uri_component(regexp, str, enc)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
end
private_class_method :_decode_uri_component

# Generates URL-encoded form data from given +enum+.
#
Expand Down
14 changes: 12 additions & 2 deletions lib/uri/generic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -564,16 +564,26 @@ def userinfo
end
end

# Returns the user component.
# Returns the user component (without URI decoding).
def user
@user
end

# Returns the password component.
# Returns the password component (without URI decoding).
def password
@password
end

# Returns the user component after URI decoding.
def decoded_user
URI.decode_uri_component(@user) if @user
end

# Returns the password component after URI decoding.
def decoded_password
URI.decode_uri_component(@password) if @password
end

#
# Checks the host +v+ component for RFC2396 compliance
# and against the URI::Parser Regexp for :HOST.
Expand Down
52 changes: 52 additions & 0 deletions test/uri/test_common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,58 @@ def test_decode_www_form_component
assert_nothing_raised(ArgumentError){URI.decode_www_form_component("x"*(1024*1024))}
end

def test_encode_uri_component
assert_equal("%00%20%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
URI.encode_uri_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
assert_equal("%95A", URI.encode_uri_component(
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
assert_equal("0B", URI.encode_uri_component(
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
assert_equal("%1B%24B%24%22%1B%28B", URI.encode_uri_component(
"\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))

assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042", Encoding::ASCII_8BIT))
assert_equal("%82%A0", URI.encode_uri_component(
"\u3042", Encoding::Windows_31J))
assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042", Encoding::UTF_8))

assert_equal("%82%A0", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::ASCII_8BIT))
assert_equal("%A4%A2", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::EUC_JP))
assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::UTF_8))
assert_equal("B0", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::UTF_16LE))
assert_equal("%26%23730%3B", URI.encode_uri_component(
"\u02DA", Encoding::WINDOWS_1252))

# invalid
assert_equal("%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
"\xE3\x81\xFF", "utf-8"))
assert_equal("%E6%9F%8A%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
"\x95\x41\xff\xff".force_encoding(Encoding::Shift_JIS), "utf-8"))
end

def test_decode_uri_component
assert_equal(" +!\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
URI.decode_uri_component(
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
URI.decode_uri_component("%A1%A2", "EUC-JP"))
assert_equal("\xE3\x81\x82\xE3\x81\x82".force_encoding("UTF-8"),
URI.decode_uri_component("\xE3\x81\x82%E3%81%82".force_encoding("UTF-8")))

assert_raise(ArgumentError){URI.decode_uri_component("%")}
assert_raise(ArgumentError){URI.decode_uri_component("%a")}
assert_raise(ArgumentError){URI.decode_uri_component("x%a_")}
assert_nothing_raised(ArgumentError){URI.decode_uri_component("x"*(1024*1024))}
end

def test_encode_www_form
assert_equal("a=1", URI.encode_www_form("a" => "1"))
assert_equal("a=1", URI.encode_www_form(a: 1))
Expand Down
9 changes: 9 additions & 0 deletions test/uri/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ def test_parse_query_pct_encoded
assert_raise(URI::InvalidURIError) { URI.parse('https://www.example.com/search?q=%XX') }
end

def test_parse_auth
str = "http://al%40ice:p%40s%25sword@example.com/dir%2Fname/subdir?foo=bar%40example.com"
uri = URI.parse(str)
assert_equal "al%40ice", uri.user
assert_equal "p%40s%25sword", uri.password
assert_equal "al@ice", uri.decoded_user
assert_equal "p@s%sword", uri.decoded_password
end

def test_raise_bad_uri_for_integer
assert_raise(URI::InvalidURIError) do
URI.parse(1)
Expand Down

0 comments on commit 16cfc4e

Please sign in to comment.