From 4d3e9e9a86202784802bfd961fd374e72eee6da2 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Tue, 11 Jul 2023 11:37:39 +0200 Subject: [PATCH] Fix trailing spaces in pathname setters --- url/src/lib.rs | 14 +++++---- url/src/parser.rs | 15 +++++----- url/src/path_segments.rs | 2 +- url/src/quirks.rs | 6 ++-- url/tests/setters_tests.json | 56 +++++++++++++++++++++++++++++++++++- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/url/src/lib.rs b/url/src/lib.rs index ad3c89001..1018aedc4 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -1486,7 +1486,7 @@ impl Url { if let Some(input) = fragment { self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); self.serialization.push('#'); - self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input))) + self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input))) } else { self.fragment_start = None; self.strip_trailing_spaces_from_opaque_path(); @@ -1549,7 +1549,7 @@ impl Url { parser.parse_query( scheme_type, scheme_end, - parser::Input::trim_tab_and_newlines(input, vfn), + parser::Input::new_trim_tab_and_newlines(input, vfn), ) }); } else { @@ -1670,10 +1670,14 @@ impl Url { parser.serialization.push_str("%2F"); path = &path[1..]; } - parser.parse_cannot_be_a_base_path(parser::Input::new(path)); + parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path)); } else { let mut has_host = true; // FIXME - parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); + parser.parse_path_start( + scheme_type, + &mut has_host, + parser::Input::new_no_trim(path), + ); } }); self.restore_after_path(old_after_path_pos, &after_path); @@ -2343,7 +2347,7 @@ impl Url { #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)] pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { let mut parser = Parser::for_setter(String::new()); - let remaining = parser.parse_scheme(parser::Input::new(scheme))?; + let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?; let new_scheme_type = SchemeType::from(&parser.serialization); let old_scheme_type = SchemeType::from(self.scheme()); // If url’s scheme is a special scheme and buffer is not a special scheme, then return. diff --git a/url/src/parser.rs b/url/src/parser.rs index 765cc027c..d5ae8314c 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -184,17 +184,13 @@ pub struct Input<'i> { } impl<'i> Input<'i> { - pub fn new(input: &'i str) -> Self { - Input::with_log(input, None) - } - - pub fn no_trim(input: &'i str) -> Self { + pub fn new_no_trim(input: &'i str) -> Self { Input { chars: input.chars(), } } - pub fn trim_tab_and_newlines( + pub fn new_trim_tab_and_newlines( original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>, ) -> Self { @@ -212,7 +208,10 @@ impl<'i> Input<'i> { } } - pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self { + pub fn new_trim_c0_control_and_space( + original_input: &'i str, + vfn: Option<&dyn Fn(SyntaxViolation)>, + ) -> Self { let input = original_input.trim_matches(c0_control_or_space); if let Some(vfn) = vfn { if input.len() < original_input.len() { @@ -362,7 +361,7 @@ impl<'a> Parser<'a> { /// https://url.spec.whatwg.org/#concept-basic-url-parser pub fn parse_url(mut self, input: &str) -> ParseResult { - let input = Input::with_log(input, self.violation_fn); + let input = Input::new_trim_c0_control_and_space(input, self.violation_fn); if let Ok(remaining) = self.parse_scheme(input.clone()) { return self.parse_with_scheme(remaining); } diff --git a/url/src/path_segments.rs b/url/src/path_segments.rs index 29afc1e7e..d8a78d785 100644 --- a/url/src/path_segments.rs +++ b/url/src/path_segments.rs @@ -237,7 +237,7 @@ impl<'a> PathSegmentsMut<'a> { scheme_type, &mut has_host, path_start, - parser::Input::new(segment), + parser::Input::new_no_trim(segment), ); } }); diff --git a/url/src/quirks.rs b/url/src/quirks.rs index 0674ebb62..3a99e22cf 100644 --- a/url/src/quirks.rs +++ b/url/src/quirks.rs @@ -152,7 +152,7 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { } // Host parsing rules are strict, // We don't want to trim the input - let input = Input::no_trim(new_host); + let input = Input::new_no_trim(new_host); let host; let opt_port; { @@ -203,7 +203,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { return Err(()); } // Host parsing rules are strict we don't want to trim the input - let input = Input::no_trim(new_hostname); + let input = Input::new_no_trim(new_hostname); let scheme_type = SchemeType::from(url.scheme()); if scheme_type == SchemeType::File && new_hostname.is_empty() { url.set_host_internal(Host::Domain(String::new()), None); @@ -249,7 +249,7 @@ pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { return Err(()); } result = Parser::parse_port( - Input::new(new_port), + Input::new_no_trim(new_port), || default_port(scheme), Context::Setter, ) diff --git a/url/tests/setters_tests.json b/url/tests/setters_tests.json index 4280032a2..6a922af92 100644 --- a/url/tests/setters_tests.json +++ b/url/tests/setters_tests.json @@ -1,6 +1,6 @@ { "comment": [ - "AS OF https://github.com/jsdom/whatwg-url/commit/35f04dfd3048cf6362f4398745bb13375c5020c2", + "AS OF https://github.com/web-platform-tests/wpt/blob/09b34ae130cd946e111cd427d6bcf2d6f257aed8/url/resources/setters_tests.json, but only passing tests", "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", "", "This file contains a JSON object.", @@ -1653,6 +1653,24 @@ "href": "file:///", "pathname": "/" } + }, + { + "comment": "Trailing space should be encoded", + "href": "http://example.net", + "new_value": " ", + "expected": { + "href": "http://example.net/%20", + "pathname": "/%20" + } + }, + { + "comment": "Trailing C0 control should be encoded", + "href": "http://example.net", + "new_value": "\u0000", + "expected": { + "href": "http://example.net/%00", + "pathname": "/%00" + } } ], "search": [ @@ -1737,6 +1755,24 @@ "href": "http://example.net/?%c3%89t%C3%A9", "search": "?%c3%89t%C3%A9" } + }, + { + "comment": "Trailing space should be encoded", + "href": "http://example.net", + "new_value": " ", + "expected": { + "href": "http://example.net/?%20", + "search": "?%20" + } + }, + { + "comment": "Trailing C0 control should be encoded", + "href": "http://example.net", + "new_value": "\u0000", + "expected": { + "href": "http://example.net/?%00", + "search": "?%00" + } } ], "hash": [ @@ -1871,6 +1907,24 @@ "href": "javascript:alert(1)#castle", "hash": "#castle" } + }, + { + "comment": "Trailing space should be encoded", + "href": "http://example.net", + "new_value": " ", + "expected": { + "href": "http://example.net/#%20", + "hash": "#%20" + } + }, + { + "comment": "Trailing C0 control should be encoded", + "href": "http://example.net", + "new_value": "\u0000", + "expected": { + "href": "http://example.net/#%00", + "hash": "#%00" + } } ] }