Skip to content

Commit

Permalink
Serve previously published artifacts for 3 days
Browse files Browse the repository at this point in the history
fixes #911
  • Loading branch information
daviddavis committed Jan 26, 2024
1 parent 84725f4 commit 868a344
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 3 deletions.
3 changes: 3 additions & 0 deletions CHANGES/911.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added feature to serve published artifacts from previous publications for 3 days.
This fulfills the apt-by-hash/acquire-by-hash spec by allowing by-hash files to be cached for a
period of 3 days.
50 changes: 50 additions & 0 deletions pulp_deb/app/migrations/0029_distributedpublication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Generated by Django 4.2.2 on 2024-01-03 18:58

from django.db import migrations, models
import django.db.models.deletion
import django_lifecycle.mixins
import pulpcore.app.models.base


class Migration(migrations.Migration):

dependencies = [
("core", "0114_remove_task_args_remove_task_kwargs"),
("deb", "0028_sourcepackage_sourcepackagereleasecomponent_and_more"),
]

operations = [
migrations.CreateModel(
name="DistributedPublication",
fields=[
(
"pulp_id",
models.UUIDField(
default=pulpcore.app.models.base.pulp_uuid,
editable=False,
primary_key=True,
serialize=False,
),
),
("pulp_created", models.DateTimeField(auto_now_add=True)),
("pulp_last_updated", models.DateTimeField(auto_now=True, null=True)),
("expires_at", models.DateTimeField(null=True)),
(
"distribution",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="core.distribution"
),
),
(
"publication",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="core.publication"
),
),
],
options={
"abstract": False,
},
bases=(django_lifecycle.mixins.LifecycleModelMixin, models.Model),
),
]
96 changes: 95 additions & 1 deletion pulp_deb/app/models/publication.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,40 @@
from contextlib import suppress
from datetime import timedelta

from django.db import models
from django.utils import timezone
from django_lifecycle import hook, AFTER_CREATE, AFTER_UPDATE

from pulpcore.plugin.models import Publication, Distribution
from pulpcore.plugin.models import (
BaseModel,
Distribution,
Publication,
PublishedArtifact,
RepositoryVersion,
)

from pulp_deb.app.models.signing_service import AptReleaseSigningService


BOOL_CHOICES = [(True, "yes"), (False, "no")]
PUBLICATION_CACHE_DURATION = timedelta(days=3)


def latest_publication(repo_pk):
"""
Find the latest publication for a repository.
This function is based on the logic in pulpcore's content handler.
https://github.com/pulp/pulpcore/blob/3bfd35c76e29944b622d275be52c0d5ebbdfbf72/pulpcore/content/handler.py#L601-L607
"""
versions = RepositoryVersion.objects.filter(repository=repo_pk)
with suppress(Publication.DoesNotExist):
return (
Publication.objects.filter(repository_version__in=versions, complete=True)
.latest("repository_version", "pulp_created")
.cast()
)


class VerbatimPublication(Publication):
Expand All @@ -17,6 +46,12 @@ class VerbatimPublication(Publication):

TYPE = "verbatim-publication"

@hook(AFTER_UPDATE, when="complete", has_changed=True, is_now=True)
def set_distributed_publication(self):
for distro in AptDistribution.objects.filter(repository__pk=self.repository.pk):
if self == latest_publication(self.repository.pk):
DistributedPublication(distribution=distro, publication=self).save()

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"

Expand All @@ -36,6 +71,12 @@ class AptPublication(Publication):
AptReleaseSigningService, on_delete=models.PROTECT, null=True
)

@hook(AFTER_UPDATE, when="complete", has_changed=True, is_now=True)
def set_distributed_publication(self):
for distro in AptDistribution.objects.filter(repository__pk=self.repository.pk):
if self == latest_publication(self.repository.pk):
DistributedPublication(distribution=distro, publication=self).save()

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"

Expand All @@ -48,5 +89,58 @@ class AptDistribution(Distribution):
TYPE = "apt-distribution"
SERVE_FROM_PUBLICATION = True

@hook(AFTER_CREATE)
@hook(AFTER_UPDATE, when="publication", has_changed=True, is_not=None)
@hook(AFTER_UPDATE, when="repository", has_changed=True, is_not=None)
def set_distributed_publication(self):
if self.publication:
DistributedPublication(distribution=self, publication=self.publication)
elif self.repository:
if publication := latest_publication(self.repository):
DistributedPublication(distribution=self, publication=publication).save()

def content_handler(self, path):
recent_dp = self.distributedpublication_set.filter(
models.Q(expires_at__gte=timezone.now()) | models.Q(expires_at__isnull=True)
).order_by("pulp_created")
pa = (
PublishedArtifact.objects.filter(
relative_path=path, publication__distributedpublication__pk__in=recent_dp
)
.order_by("-publication__distributedpublication__pulp_created")
.select_related(
"content_artifact",
"content_artifact__artifact",
)
).first()

if pa:
return pa.content_artifact
return

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"


class DistributedPublication(BaseModel):
"""
Represents a history of distributed publications.
This allows the content handler to serve a previous Publication's content for a set period of
time.
When a new Publication is served by a Distribution, it creates a new DistributionPublication and
sets the expires_at field on any existing DistributionPublications.
"""

distribution = models.ForeignKey(Distribution, on_delete=models.CASCADE)
publication = models.ForeignKey(Publication, on_delete=models.CASCADE)
expires_at = models.DateTimeField(null=True)

@hook(AFTER_CREATE)
def cleanup(self):
"""Set expires_at on any older DistributedPublication and cleanup any expired ones."""
DistributedPublication.objects.filter(expires_at__lt=timezone.now()).delete()
DistributedPublication.objects.exclude(pk=self.pk).filter(
distribution=self.distribution, expires_at__isnull=True
).update(expires_at=(timezone.now() + PUBLICATION_CACHE_DURATION))
7 changes: 5 additions & 2 deletions pulp_deb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,17 @@ def apt_repository_versions_api(apt_client):
def deb_distribution_factory(apt_distribution_api, gen_object_with_cleanup):
"""Fixture that generates a deb distribution with cleanup from a given publication."""

def _deb_distribution_factory(publication):
def _deb_distribution_factory(publication=None, repository=None):
"""Create a deb distribution.
:param publication: The publication the distribution is based on.
:returns: The created distribution.
"""
body = gen_distribution()
body["publication"] = publication.pulp_href
if publication:
body["publication"] = publication.pulp_href
if repository:
body["repository"] = repository.pulp_href
return gen_object_with_cleanup(apt_distribution_api, body)

return _deb_distribution_factory
Expand Down
34 changes: 34 additions & 0 deletions pulp_deb/tests/functional/api/test_download_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,37 @@ def test_download_content(
content = download_content_unit(distribution.base_path, unit_path[1])
pulp_hashes.append(hashlib.sha256(content).hexdigest())
assert fixtures_hashes == pulp_hashes


@pytest.mark.parallel
def test_download_cached_content(
deb_init_and_sync,
deb_distribution_factory,
deb_publication_factory,
deb_fixture_server,
download_content_unit,
http_get,
deb_get_content_types,
deb_modify_repository,
):
"""Verify that previously published content can still be downloaded."""
# Create/sync a repo and then a distro
repo, _ = deb_init_and_sync()
distribution = deb_distribution_factory(repository=repo)
deb_publication_factory(repo, structured=True, simple=True)

# Find a random package and get its hash digest
package_content = deb_get_content_types("apt_package_api", DEB_PACKAGE_NAME, repo)
package = choice(package_content)
url = deb_fixture_server.make_url(DEB_FIXTURE_STANDARD_REPOSITORY_NAME)
package_hash = hashlib.sha256(http_get(urljoin(url, package.relative_path))).hexdigest()

# Remove content and republish
deb_modify_repository(repo, {"remove_content_units": ["*"]})
deb_publication_factory(repo, structured=True, simple=True)

# Download the package and check its checksum
content = download_content_unit(distribution.base_path, package.relative_path)
content_hash = hashlib.sha256(content).hexdigest()

assert package_hash == content_hash

0 comments on commit 868a344

Please sign in to comment.