Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(url): fix host extra trailing slash issue #1173

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions src/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,25 @@ impl PyUrl {
};
let mut url = format!("{scheme}://{url_host}");
if let Some(path) = path {
url.push('/');
url.push_str(path);
if !url.ends_with('/') {
url.push('/');
}
Comment on lines +180 to +182
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like we should expect that url never ends with /, as that's technically not a valid hostname? I wonder if there's a case to raise an error if url_host contains /?

if path.starts_with('/') {
url.push_str(path.trim_start_matches('/'));
} else {
url.push_str(path);
}
Comment on lines +183 to +187
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think trimming the path is too aggressive, because ///foo as a path is technically meaningful and

Suggested change
if path.starts_with('/') {
url.push_str(path.trim_start_matches('/'));
} else {
url.push_str(path);
}
if !path.starts_with('/') {
url.push('/');
}

}
if let Some(query) = query {
url.push('?');
if !query.starts_with('?') {
url.push('?');
}
Comment on lines +190 to +192
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a change in functionality which is slightly different, so perhaps needs to be split out into a separate PR. Similar to my comment about hostname, there should probably be some validation here.

url.push_str(query);
}
if let Some(fragment) = fragment {
url.push('#');
if !fragment.starts_with('#') {
url.push('#');
}
url.push_str(fragment);
}
cls.call1((url,))
Expand Down Expand Up @@ -405,15 +415,25 @@ impl PyMultiHostUrl {
};

if let Some(path) = path {
url.push('/');
url.push_str(path);
if !url.ends_with('/') {
url.push('/');
}
if path.starts_with('/') {
url.push_str(path.trim_start_matches('/'));
} else {
url.push_str(path);
}
}
if let Some(query) = query {
url.push('?');
if !query.starts_with('?') {
url.push('?');
}
url.push_str(query);
}
if let Some(fragment) = fragment {
url.push('#');
if !fragment.starts_with('#') {
url.push('#');
}
Comment on lines 417 to +436
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comments as above, I guess.

url.push_str(fragment);
}
cls.call1((url,))
Expand Down
75 changes: 58 additions & 17 deletions tests/validators/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
from ..conftest import Err, PyAndJson


def test_url_ok(py_and_json: PyAndJson):
v = py_and_json(core_schema.url_schema())
url = v.validate_test('https://example.com/foo/bar?baz=qux#quux')
def assert_example_url(url: Url):
# example URL in question 'https://example.com/foo/bar?baz=qux#quux'

assert isinstance(url, Url)
assert str(url) == 'https://example.com/foo/bar?baz=qux#quux'
Expand All @@ -30,23 +29,51 @@ def test_url_ok(py_and_json: PyAndJson):
assert url.port == 443


def test_url_ok(py_and_json: PyAndJson):
v = py_and_json(core_schema.url_schema())
url = v.validate_test('https://example.com/foo/bar?baz=qux#quux')

assert_example_url(url)


def test_url_from_constructor_ok():
url = Url('https://example.com/foo/bar?baz=qux#quux')

assert isinstance(url, Url)
assert str(url) == 'https://example.com/foo/bar?baz=qux#quux'
assert repr(url) == "Url('https://example.com/foo/bar?baz=qux#quux')"
assert url.unicode_string() == 'https://example.com/foo/bar?baz=qux#quux'
assert url.scheme == 'https'
assert url.host == 'example.com'
assert url.unicode_host() == 'example.com'
assert url.path == '/foo/bar'
assert url.query == 'baz=qux'
assert url.query_params() == [('baz', 'qux')]
assert url.fragment == 'quux'
assert url.username is None
assert url.password is None
assert url.port == 443
assert_example_url(url)


def test_url_from_build_ok():
# 1) no host trailing slash/no path leading slash in the input
url = Url.build(scheme='https', host='example.com', path='foo/bar', query='baz=qux', fragment='quux')
assert_example_url(url)

# 2) no host trailing slash/with path leading slash in the input
url = Url.build(scheme='https', host='example.com', path='/foo/bar', query='baz=qux', fragment='quux')
assert_example_url(url)

# 3) with host trailing slash/no path leading slash in the input
url = Url.build(scheme='https', host='example.com/', path='foo/bar', query='baz=qux', fragment='quux')
assert_example_url(url)

# 4) with host trailing slash/with path leading slash in the input
url = Url.build(scheme='https', host='example.com/', path='/foo/bar', query='baz=qux', fragment='quux')
assert_example_url(url)
Comment on lines +54 to +60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I would be tempted to argue that these cases should never be valid, because the host isn't valid? Maybe that needs to wait until V3 given it's technically breaking? Or is it kinda broken anyway already because of the unconditional / addition?


# 5) query no leading question mark
url = Url.build(scheme='https', host='example.com', path='foo/bar', query='baz=qux', fragment='quux')
assert_example_url(url)

# 6) query with leading question mark
url = Url.build(scheme='https', host='example.com', path='foo/bar', query='?baz=qux', fragment='quux')
assert_example_url(url)

# 7) fragment no leading hash
url = Url.build(scheme='https', host='example.com', path='foo/bar', query='baz=qux', fragment='quux')
assert_example_url(url)

# 8) fragment with leading hash
url = Url.build(scheme='https', host='example.com', path='foo/bar', query='baz=qux', fragment='#quux')
assert_example_url(url)


@pytest.fixture(scope='module', name='url_validator')
Expand Down Expand Up @@ -1232,6 +1259,20 @@ def test_multi_url_build() -> None:
assert url == MultiHostUrl('postgresql://testuser:testpassword@127.0.0.1:5432/database?sslmode=require#test')
assert str(url) == 'postgresql://testuser:testpassword@127.0.0.1:5432/database?sslmode=require#test'

# assert that `build` builds correctly with leading slash in path, leading question mark in query and leading hash in fragment
url = MultiHostUrl.build(
scheme='postgresql',
username='testuser',
password='testpassword',
host='127.0.0.1',
port=5432,
path='/database',
query='?sslmode=require',
fragment='#test',
)
assert url == MultiHostUrl('postgresql://testuser:testpassword@127.0.0.1:5432/database?sslmode=require#test')
assert str(url) == 'postgresql://testuser:testpassword@127.0.0.1:5432/database?sslmode=require#test'


@pytest.mark.parametrize('field', ['host', 'password', 'username', 'port'])
def test_multi_url_build_hosts_set_with_single_value(field) -> None:
Expand Down
Loading