Skip to content

Commit

Permalink
new updater: Clean up url handling
Browse files Browse the repository at this point in the history
* Make sure all base urls (prefixes) end in a slash
* Add documentation to get_one_valid_targetinfo(): That is the one place
  where the API accepts ill-defined "paths" from the caller
* Remove checks from download url handling: we control both the base url
  and the relative path so there should be no surprises here.

Signed-off-by: Jussi Kukkonen <jkukkonen@vmware.com>
  • Loading branch information
Jussi Kukkonen committed May 5, 2021
1 parent 3a02583 commit ab210b4
Showing 1 changed file with 26 additions and 30 deletions.
56 changes: 26 additions & 30 deletions tuf/client_rework/updater_rework.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ def __init__(
target_base_url: Optional[str] = None,
fetcher: Optional[FetcherInterface] = None,
):

self._repository_name = repository_name
self._metadata_base_url = metadata_base_url
self._target_base_url = target_base_url
self._metadata_base_url = _ensure_trailing_slash(metadata_base_url)
if target_base_url is None:
self._target_base_url = None
else:
self._target_base_url = _ensure_trailing_slash(target_base_url)
self._consistent_snapshot = False
self._metadata = {}

Expand Down Expand Up @@ -77,13 +79,18 @@ def refresh(self) -> None:
self._load_snapshot()
self._load_targets("targets", "root")

def get_one_valid_targetinfo(self, filename: str) -> Dict:
def get_one_valid_targetinfo(self, target_path: str) -> Dict:
"""
Returns the target information for a specific file identified by its
file path. This target method also downloads the metadata of updated
targets.
Returns the target information for a target identified by target_path.
As a side-effect this method downloads all the metadata it needs to
return the target information.
Args:
target_path: A path-relative-URL string
(https://url.spec.whatwg.org/#path-relative-url-string)
"""
return self._preorder_depth_first_walk(filename)
return self._preorder_depth_first_walk(target_path)

@staticmethod
def updated_targets(targets: Dict, destination_directory: str) -> Dict:
Expand Down Expand Up @@ -151,10 +158,12 @@ def download_target(
"target_base_url must be set in either download_target() or "
"constructor"
)
elif target_base_url is None:
if target_base_url is None:
target_base_url = self._target_base_url
else:
target_base_url = _ensure_trailing_slash(target_base_url)

full_url = _build_full_url(target_base_url, targetinfo["filepath"])
full_url = parse.urljoin(target_base_url, targetinfo["filepath"])

with download.download_file(
full_url, targetinfo["fileinfo"]["length"], self._fetcher
Expand Down Expand Up @@ -207,7 +216,7 @@ def _load_root(self) -> None:

for next_version in range(lower_bound, upper_bound):
try:
root_url = _build_full_url(
root_url = parse.urljoin(
self._metadata_base_url, f"{next_version}.root.json"
)
# For each version of root iterate over the list of mirrors
Expand Down Expand Up @@ -271,9 +280,7 @@ def _load_timestamp(self) -> None:
TODO
"""
# TODO Check if timestamp exists locally
timestamp_url = _build_full_url(
self._metadata_base_url, "timestamp.json"
)
timestamp_url = parse.urljoin(self._metadata_base_url, "timestamp.json")
data = download.download_bytes(
timestamp_url,
settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH,
Expand Down Expand Up @@ -301,7 +308,7 @@ def _load_snapshot(self) -> None:
# version = None

# TODO: Check if exists locally
snapshot_url = _build_full_url(self._metadata_base_url, "snapshot.json")
snapshot_url = parse.urljoin(self._metadata_base_url, "snapshot.json")
data = download.download_bytes(
snapshot_url,
length,
Expand Down Expand Up @@ -331,7 +338,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None:

# TODO: Check if exists locally

targets_url = _build_full_url(
targets_url = parse.urljoin(
self._metadata_base_url, f"{targets_role}.json"
)
data = download.download_bytes(
Expand Down Expand Up @@ -761,17 +768,6 @@ def _get_filepath_hash(target_filepath, hash_function="sha256"):
return target_filepath_hash


def _build_full_url(base_url, filepath):
"""
Build a full “absolute" URL by combining a base URL with
a relative file path.
"""
# Are these steps enough? Or too much? Is this the right place?
filepath = parse.quote(filepath)
# Assuming that base_url ends with a '/' character, otherwise parse.urljoin
# omits (correcly) the last part of the base URL path
full_url = parse.urljoin(base_url, filepath)
# Avoid windows path separators. Should we keep this check? Or correct
# paths should be required from the user?
# full_url.replace("\\", "/")
return full_url
def _ensure_trailing_slash(url: str):
"""Return url guaranteed to end in a slash"""
return url if url.endswith("/") else f"{url}/"

0 comments on commit ab210b4

Please sign in to comment.