From 6cdcb452ce84cb8babd5301f251b28aab0ce4077 Mon Sep 17 00:00:00 2001 From: David Davis Date: Wed, 1 Nov 2023 15:13:29 -0400 Subject: [PATCH] Serve previously published artifacts for 3 days fixes #911 --- CHANGES/911.feature | 3 + .../migrations/0029_distributedpublication.py | 50 ++++++++++ pulp_deb/app/models/publication.py | 96 ++++++++++++++++++- .../functional/api/test_download_content.py | 34 +++++++ pulp_deb/tests/functional/conftest.py | 7 +- 5 files changed, 187 insertions(+), 3 deletions(-) create mode 100644 CHANGES/911.feature create mode 100644 pulp_deb/app/migrations/0029_distributedpublication.py diff --git a/CHANGES/911.feature b/CHANGES/911.feature new file mode 100644 index 000000000..f9c555fa0 --- /dev/null +++ b/CHANGES/911.feature @@ -0,0 +1,3 @@ +Added feature to serve published artifacts from previous publications for 3 days. +This fulfills the apt-by-hash/acquire-by-hash spec by allowing by-hash files to be cached for a +period of 3 days. diff --git a/pulp_deb/app/migrations/0029_distributedpublication.py b/pulp_deb/app/migrations/0029_distributedpublication.py new file mode 100644 index 000000000..b2013be55 --- /dev/null +++ b/pulp_deb/app/migrations/0029_distributedpublication.py @@ -0,0 +1,50 @@ +# Generated by Django 4.2.2 on 2024-01-03 18:58 + +from django.db import migrations, models +import django.db.models.deletion +import django_lifecycle.mixins +import pulpcore.app.models.base + + +class Migration(migrations.Migration): + + dependencies = [ + ("core", "0114_remove_task_args_remove_task_kwargs"), + ("deb", "0028_sourcepackage_sourcepackagereleasecomponent_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="DistributedPublication", + fields=[ + ( + "pulp_id", + models.UUIDField( + default=pulpcore.app.models.base.pulp_uuid, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("pulp_created", models.DateTimeField(auto_now_add=True)), + ("pulp_last_updated", models.DateTimeField(auto_now=True, null=True)), + ("expires_at", models.DateTimeField(null=True)), + ( + "distribution", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="core.distribution" + ), + ), + ( + "publication", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="core.publication" + ), + ), + ], + options={ + "abstract": False, + }, + bases=(django_lifecycle.mixins.LifecycleModelMixin, models.Model), + ), + ] diff --git a/pulp_deb/app/models/publication.py b/pulp_deb/app/models/publication.py index 48fa20783..6cff53a5b 100644 --- a/pulp_deb/app/models/publication.py +++ b/pulp_deb/app/models/publication.py @@ -1,11 +1,40 @@ +from contextlib import suppress +from datetime import timedelta + from django.db import models +from django.utils import timezone +from django_lifecycle import hook, AFTER_CREATE, AFTER_UPDATE -from pulpcore.plugin.models import Publication, Distribution +from pulpcore.plugin.models import ( + BaseModel, + Distribution, + Publication, + PublishedArtifact, + RepositoryVersion, +) from pulp_deb.app.models.signing_service import AptReleaseSigningService BOOL_CHOICES = [(True, "yes"), (False, "no")] +PUBLICATION_CACHE_DURATION = timedelta(days=3) + + +def latest_publication(repo_pk): + """ + Find the latest publication for a repository. + + This function is based on the logic in pulpcore's content handler. + + https://github.com/pulp/pulpcore/blob/3bfd35c76e29944b622d275be52c0d5ebbdfbf72/pulpcore/content/handler.py#L601-L607 + """ + versions = RepositoryVersion.objects.filter(repository=repo_pk) + with suppress(Publication.DoesNotExist): + return ( + Publication.objects.filter(repository_version__in=versions, complete=True) + .latest("repository_version", "pulp_created") + .cast() + ) class VerbatimPublication(Publication): @@ -17,6 +46,12 @@ class VerbatimPublication(Publication): TYPE = "verbatim-publication" + @hook(AFTER_UPDATE, when="complete", has_changed=True, is_now=True) + def set_distributed_publication(self): + for distro in AptDistribution.objects.filter(repository__pk=self.repository.pk): + if self == latest_publication(self.repository.pk): + DistributedPublication(distribution=distro, publication=self).save() + class Meta: default_related_name = "%(app_label)s_%(model_name)s" @@ -36,6 +71,12 @@ class AptPublication(Publication): AptReleaseSigningService, on_delete=models.PROTECT, null=True ) + @hook(AFTER_UPDATE, when="complete", has_changed=True, is_now=True) + def set_distributed_publication(self): + for distro in AptDistribution.objects.filter(repository__pk=self.repository.pk): + if self == latest_publication(self.repository.pk): + DistributedPublication(distribution=distro, publication=self).save() + class Meta: default_related_name = "%(app_label)s_%(model_name)s" @@ -48,5 +89,58 @@ class AptDistribution(Distribution): TYPE = "apt-distribution" SERVE_FROM_PUBLICATION = True + @hook(AFTER_CREATE) + @hook(AFTER_UPDATE, when="publication", has_changed=True, is_not=None) + @hook(AFTER_UPDATE, when="repository", has_changed=True, is_not=None) + def set_distributed_publication(self): + if self.publication: + DistributedPublication(distribution=self, publication=self.publication) + elif self.repository: + if publication := latest_publication(self.repository): + DistributedPublication(distribution=self, publication=publication).save() + + def content_handler(self, path): + recent_dp = self.distributedpublication_set.filter( + models.Q(expires_at__gte=timezone.now()) | models.Q(expires_at__isnull=True) + ).order_by("pulp_created") + pa = ( + PublishedArtifact.objects.filter( + relative_path=path, publication__distributedpublication__pk__in=recent_dp + ) + .order_by("-publication__distributedpublication__pulp_created") + .select_related( + "content_artifact", + "content_artifact__artifact", + ) + ).first() + + if pa: + return pa.content_artifact + return + class Meta: default_related_name = "%(app_label)s_%(model_name)s" + + +class DistributedPublication(BaseModel): + """ + Represents a history of distributed publications. + + This allows the content handler to serve a previous Publication's content for a set period of + time. + + When a new Publication is served by a Distribution, it creates a new DistributionPublication and + sets the expires_at field on any existing DistributionPublications. + """ + + distribution = models.ForeignKey(Distribution, on_delete=models.CASCADE) + publication = models.ForeignKey(Publication, on_delete=models.CASCADE) + expires_at = models.DateTimeField(null=True) + + @hook(AFTER_CREATE) + def cleanup(self): + """Set expires_at on any older DistributedPublication and cleanup any expired ones.""" + DistributedPublication.objects.filter(expires_at__lt=timezone.now()).delete() + DistributedPublication.objects.exclude(pk=self.pk).filter( + distribution=self.distribution, expires_at__isnull=True + ).update(expires_at=(timezone.now() + PUBLICATION_CACHE_DURATION)) diff --git a/pulp_deb/tests/functional/api/test_download_content.py b/pulp_deb/tests/functional/api/test_download_content.py index 6695ad82b..efcc58e64 100644 --- a/pulp_deb/tests/functional/api/test_download_content.py +++ b/pulp_deb/tests/functional/api/test_download_content.py @@ -169,3 +169,37 @@ def test_download_content( content = download_content_unit(distribution.base_path, unit_path[1]) pulp_hashes.append(hashlib.sha256(content).hexdigest()) assert fixtures_hashes == pulp_hashes + + +@pytest.mark.parallel +def test_download_cached_content( + deb_init_and_sync, + deb_distribution_factory, + deb_publication_factory, + deb_fixture_server, + download_content_unit, + http_get, + deb_get_content_types, + deb_modify_repository, +): + """Verify that previously published content can still be downloaded.""" + # Create/sync a repo and then a distro + repo, _ = deb_init_and_sync() + distribution = deb_distribution_factory(repository=repo) + deb_publication_factory(repo, structured=True, simple=True) + + # Find a random package and get its hash digest + package_content = deb_get_content_types("apt_package_api", DEB_PACKAGE_NAME, repo) + package = choice(package_content) + url = deb_fixture_server.make_url(DEB_FIXTURE_STANDARD_REPOSITORY_NAME) + package_hash = hashlib.sha256(http_get(urljoin(url, package.relative_path))).hexdigest() + + # Remove content and republish + deb_modify_repository(repo, {"remove_content_units": ["*"]}) + deb_publication_factory(repo, structured=True, simple=True) + + # Download the package and check its checksum + content = download_content_unit(distribution.base_path, package.relative_path) + content_hash = hashlib.sha256(content).hexdigest() + + assert package_hash == content_hash diff --git a/pulp_deb/tests/functional/conftest.py b/pulp_deb/tests/functional/conftest.py index 04b772030..07838b46d 100644 --- a/pulp_deb/tests/functional/conftest.py +++ b/pulp_deb/tests/functional/conftest.py @@ -132,14 +132,17 @@ def apt_generic_content_api(apt_client): def deb_distribution_factory(apt_distribution_api, gen_object_with_cleanup): """Fixture that generates a deb distribution with cleanup from a given publication.""" - def _deb_distribution_factory(publication): + def _deb_distribution_factory(publication=None, repository=None): """Create a deb distribution. :param publication: The publication the distribution is based on. :returns: The created distribution. """ body = gen_distribution() - body["publication"] = publication.pulp_href + if publication: + body["publication"] = publication.pulp_href + if repository: + body["repository"] = repository.pulp_href return gen_object_with_cleanup(apt_distribution_api, body) return _deb_distribution_factory