Skip to content

Commit

Permalink
Fixed concurrent-ks-tree-syncs by making disttree ID repo-specific.
Browse files Browse the repository at this point in the history
DistributionTree digest and subrepo-names now both end with the
pulp-id of the "owning" Repository, making them unique to that
repo and therefore protected from concurrent-updates against
anything that is changing that Repository.

Addon/Variant/Image are transitively made unique by virtue of
having their DistributionTree be part of their unique-together.

Sub-repo **content** (e.g. Packages et al) are de-duplicated via
their existing uniqueness constraints.

The end result is a minor increase in Content objects (i.e.,
DistTrees/Addons/Images/Variants that used to have only one
instance are now one-per-containing-repo), and a small impact
on subrepo-syncing (since previously-unique subrepos will now
have a first-sync that would have been skipped). Content will
continue to only be sync'd once.

fixes #2278.
[nocoverage]
  • Loading branch information
ggainey committed Nov 7, 2022
1 parent 3475bc3 commit 52a9acc
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
3 changes: 3 additions & 0 deletions CHANGES/2278.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed concurrent-overlapping-sync of subrepos by making them repository-unique.

This change is transparent to end-users.
26 changes: 15 additions & 11 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,24 +484,16 @@ def is_subrepo(directory):

with tempfile.TemporaryDirectory(dir="."):
remote_url = fetch_remote_url(remote, url)
sync_details = get_sync_details(remote, remote_url, sync_policy, repository)

repo_sync_config[PRIMARY_REPO] = {
"should_skip": should_optimize_sync(sync_details, repository.last_sync_details),
"sync_details": sync_details,
"url": remote_url,
"repo": repository,
}

# Find and set up to deal with any subtrees
treeinfo = get_treeinfo_data(remote, remote_url)

if treeinfo:
treeinfo["repositories"] = {}
for repodata in set(treeinfo["download"]["repodatas"]):
if repodata == DIST_TREE_MAIN_REPO_PATH:
treeinfo["repositories"].update({repodata: None})
continue
name = f"{repodata}-{treeinfo['hash']}"
name = f"{repodata}-{treeinfo['hash']}-{repository.pulp_id}"
sub_repo, created = RpmRepository.objects.get_or_create(name=name, user_hidden=True)
if created:
sub_repo.save()
Expand All @@ -527,6 +519,15 @@ def is_subrepo(directory):
"repo": sub_repo,
}

# Set up to deal with the primary repository
sync_details = get_sync_details(remote, remote_url, sync_policy, repository)
repo_sync_config[PRIMARY_REPO] = {
"should_skip": should_optimize_sync(sync_details, repository.last_sync_details),
"sync_details": sync_details,
"url": remote_url,
"repo": repository,
}

# If all repos are exactly the same, we should skip all further processing, even in
# metadata-mirror mode
if optimize and all([config["should_skip"] for config in repo_sync_config.values()]):
Expand All @@ -541,6 +542,8 @@ def is_subrepo(directory):
repo_sync_results = {}

# If some repos need to be synced and others do not, we go through them all
# items() returns in insertion-order - make sure PRIMARY is the LAST thing we process
# here, or autopublish will fail to find any subrepo-content.
for directory, repo_config in repo_sync_config.items():
repo = repo_config["repo"]
# If metadata_mirroring is enabled we cannot skip any syncs, because the generated
Expand Down Expand Up @@ -878,7 +881,8 @@ async def parse_distribution_tree(self):
)
d_artifacts.append(da)

self.treeinfo["distribution_tree"]["digest"] = self.treeinfo["hash"]
tree_digest = f'{self.treeinfo["hash"]}-{self.repository.pulp_id}'
self.treeinfo["distribution_tree"]["digest"] = tree_digest
distribution_tree = DistributionTree(**self.treeinfo["distribution_tree"])
dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts)
dc.extra_data = self.treeinfo
Expand Down

0 comments on commit 52a9acc

Please sign in to comment.