Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed concurrent-ks-tree-syncs by making disttree ID repo-specific. #2851

Merged
merged 1 commit into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES/2278.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed concurrent-overlapping-sync of subrepos by making them repository-unique.

This change is transparent to end-users.
26 changes: 15 additions & 11 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,24 +484,16 @@ def is_subrepo(directory):

with tempfile.TemporaryDirectory(dir="."):
remote_url = fetch_remote_url(remote, url)
sync_details = get_sync_details(remote, remote_url, sync_policy, repository)

repo_sync_config[PRIMARY_REPO] = {
"should_skip": should_optimize_sync(sync_details, repository.last_sync_details),
"sync_details": sync_details,
"url": remote_url,
"repo": repository,
}

# Find and set up to deal with any subtrees
treeinfo = get_treeinfo_data(remote, remote_url)

if treeinfo:
treeinfo["repositories"] = {}
for repodata in set(treeinfo["download"]["repodatas"]):
if repodata == DIST_TREE_MAIN_REPO_PATH:
treeinfo["repositories"].update({repodata: None})
continue
name = f"{repodata}-{treeinfo['hash']}"
name = f"{repodata}-{treeinfo['hash']}-{repository.pulp_id}"
sub_repo, created = RpmRepository.objects.get_or_create(name=name, user_hidden=True)
if created:
sub_repo.save()
Expand All @@ -527,6 +519,15 @@ def is_subrepo(directory):
"repo": sub_repo,
}

# Set up to deal with the primary repository
sync_details = get_sync_details(remote, remote_url, sync_policy, repository)
repo_sync_config[PRIMARY_REPO] = {
"should_skip": should_optimize_sync(sync_details, repository.last_sync_details),
"sync_details": sync_details,
"url": remote_url,
"repo": repository,
}

# If all repos are exactly the same, we should skip all further processing, even in
# metadata-mirror mode
if optimize and all([config["should_skip"] for config in repo_sync_config.values()]):
Expand All @@ -541,6 +542,8 @@ def is_subrepo(directory):
repo_sync_results = {}

# If some repos need to be synced and others do not, we go through them all
# items() returns in insertion-order - make sure PRIMARY is the LAST thing we process
# here, or autopublish will fail to find any subrepo-content.
for directory, repo_config in repo_sync_config.items():
repo = repo_config["repo"]
# If metadata_mirroring is enabled we cannot skip any syncs, because the generated
Expand Down Expand Up @@ -878,7 +881,8 @@ async def parse_distribution_tree(self):
)
d_artifacts.append(da)

self.treeinfo["distribution_tree"]["digest"] = self.treeinfo["hash"]
tree_digest = f'{self.treeinfo["hash"]}-{self.repository.pulp_id}'
self.treeinfo["distribution_tree"]["digest"] = tree_digest
distribution_tree = DistributionTree(**self.treeinfo["distribution_tree"])
dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts)
dc.extra_data = self.treeinfo
Expand Down