Skip to content

Commit

Permalink
Add compression_type option for publish
Browse files Browse the repository at this point in the history
Add compression_type option for publishing with support for zstd.

closes #3316
  • Loading branch information
pedro-psb authored and dralley committed Dec 13, 2023
1 parent b778133 commit c59145f
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGES/3316.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a ``compression_type`` option to allow publishing metadata files with zstd compression (in addition to the default gzip).
11 changes: 11 additions & 0 deletions pulp_rpm/app/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
from types import SimpleNamespace

# metadata compression types supported
COMPRESSION_TYPES = SimpleNamespace(
ZSTD="zstd",
GZ="gz",
)

COMPRESSION_CHOICES = (
(COMPRESSION_TYPES.ZSTD, COMPRESSION_TYPES.ZSTD),
(COMPRESSION_TYPES.GZ, COMPRESSION_TYPES.GZ),
)

CHECKSUM_TYPES = SimpleNamespace(
UNKNOWN="unknown",
MD5="md5",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.7 on 2023-12-12 18:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('rpm', '0058_alter_addon_repository_alter_variant_repository'),
]

operations = [
migrations.AddField(
model_name='rpmpublication',
name='compression_type',
field=models.TextField(choices=[('zstd', 'zstd'), ('gz', 'gz')], null=True),
),
migrations.AddField(
model_name='rpmrepository',
name='compression_type',
field=models.TextField(choices=[('zstd', 'zstd'), ('gz', 'gz')], null=True),
),
]
7 changes: 6 additions & 1 deletion pulp_rpm/app/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
validate_version_paths,
)

from pulp_rpm.app.constants import CHECKSUM_CHOICES
from pulp_rpm.app.constants import CHECKSUM_CHOICES, COMPRESSION_CHOICES
from pulp_rpm.app.models import (
DistributionTree,
Package,
Expand Down Expand Up @@ -199,6 +199,8 @@ class RpmRepository(Repository, AutoAddObjPermsMixin):
package_checksum_type (String):
The name of a default checksum type to use for packages when generating metadata.
repo_config (JSON): repo configuration that will be served by distribution
compression_type(pulp_rpm.app.constants.COMPRESSION_TYPES):
Compression type to use for metadata files.
"""

TYPE = "rpm"
Expand Down Expand Up @@ -226,6 +228,7 @@ class RpmRepository(Repository, AutoAddObjPermsMixin):

autopublish = models.BooleanField(default=False)
checksum_type = models.TextField(null=True, choices=CHECKSUM_CHOICES)
compression_type = models.TextField(null=True, choices=COMPRESSION_CHOICES)
metadata_checksum_type = models.TextField(null=True, choices=CHECKSUM_CHOICES)
package_checksum_type = models.TextField(null=True, choices=CHECKSUM_CHOICES)
repo_config = models.JSONField(default=dict)
Expand All @@ -252,6 +255,7 @@ def on_new_version(self, version):
"package": self.package_checksum_type,
},
repo_config=self.repo_config,
compression_type=self.compression_type,
)

@staticmethod
Expand Down Expand Up @@ -416,6 +420,7 @@ class RpmPublication(Publication, AutoAddObjPermsMixin):

TYPE = "rpm"
checksum_type = models.TextField(choices=CHECKSUM_CHOICES)
compression_type = models.TextField(null=True, choices=COMPRESSION_CHOICES)
metadata_checksum_type = models.TextField(choices=CHECKSUM_CHOICES)
package_checksum_type = models.TextField(choices=CHECKSUM_CHOICES)
repo_config = models.JSONField(default=dict)
Expand Down
14 changes: 14 additions & 0 deletions pulp_rpm/app/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
CHECKSUM_CHOICES,
SKIP_TYPES,
SYNC_POLICY_CHOICES,
COMPRESSION_CHOICES,
)
from pulp_rpm.app.models import (
RpmDistribution,
Expand Down Expand Up @@ -89,6 +90,12 @@ class RpmRepositorySerializer(RepositorySerializer):
required=False,
allow_null=True,
)
compression_type = serializers.ChoiceField(
help_text=_("The compression type to use for metadata files."),
choices=COMPRESSION_CHOICES,
required=False,
allow_null=True,
)
gpgcheck = serializers.IntegerField(
max_value=1,
min_value=0,
Expand Down Expand Up @@ -203,6 +210,7 @@ class Meta:
"repo_gpgcheck",
"sqlite_metadata",
"repo_config",
"compression_type",
)
model = RpmRepository

Expand Down Expand Up @@ -297,6 +305,11 @@ class RpmPublicationSerializer(PublicationSerializer):
choices=CHECKSUM_CHOICES,
required=False,
)
compression_type = serializers.ChoiceField(
help_text=_("The compression type to use for metadata files."),
choices=COMPRESSION_CHOICES,
required=False,
)
gpgcheck = serializers.IntegerField(
max_value=1,
min_value=0,
Expand Down Expand Up @@ -377,6 +390,7 @@ class Meta:
"repo_gpgcheck",
"sqlite_metadata",
"repo_config",
"compression_type",
)
model = RpmPublication

Expand Down
46 changes: 30 additions & 16 deletions pulp_rpm/app/tasks/publishing.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
from collections import defaultdict
from gettext import gettext as _
import os
import logging
import os
import shutil
import tempfile
from collections import defaultdict
from gettext import gettext as _

import createrepo_c as cr
import libcomps

from django.conf import settings
from django.core.files import File
from django.db.models import Q

from pulpcore.plugin.models import (
AsciiArmoredDetachedSigningService,
ContentArtifact,
RepositoryVersion,
ProgressReport,
PublishedArtifact,
PublishedMetadata,
RepositoryContent,
RepositoryVersion,
)

from pulp_rpm.app.comps import dict_to_strdict
from pulp_rpm.app.constants import ALLOWED_CHECKSUM_ERROR_MSG, CHECKSUM_TYPES, PACKAGES_DIRECTORY
from pulp_rpm.app.constants import (
ALLOWED_CHECKSUM_ERROR_MSG,
CHECKSUM_TYPES,
COMPRESSION_TYPES,
PACKAGES_DIRECTORY,
)
from pulp_rpm.app.kickstart.treeinfo import PulpTreeInfo, TreeinfoData
from pulp_rpm.app.models import (
DistributionTree,
Expand Down Expand Up @@ -325,6 +328,7 @@ def publish(
metadata_signing_service=None,
checksum_types=None,
repo_config=None,
compression_type=COMPRESSION_TYPES.GZ,
):
"""
Create a Publication based on a RepositoryVersion.
Expand All @@ -335,6 +339,8 @@ def publish(
A reference to an associated signing service.
checksum_types (dict): Checksum types for metadata and packages.
repo_config (JSON): repo config that will be served by distribution
compression_type(pulp_rpm.app.constants.COMPRESSION_TYPES):
Compression type to use for metadata files.
"""
repository_version = RepositoryVersion.objects.get(pk=repository_version_pk)
Expand All @@ -360,7 +366,7 @@ def publish(
publication.checksum_type = checksum_type
publication.metadata_checksum_type = checksum_type
publication.package_checksum_type = checksum_types.get("package") or checksum_type

publication.compression_type = compression_type
publication.repo_config = repo_config

publication_data = PublicationData(publication)
Expand All @@ -382,6 +388,7 @@ def publish(
checksum_types,
publication_data.repomdrecords,
metadata_signing_service=metadata_signing_service,
compression_type=compression_type,
)
publish_pb.increment()

Expand All @@ -397,6 +404,7 @@ def publish(
extra_repomdrecords,
name,
metadata_signing_service=metadata_signing_service,
compression_type=compression_type,
)
publish_pb.increment()

Expand All @@ -412,6 +420,7 @@ def generate_repo_metadata(
extra_repomdrecords,
sub_folder=None,
metadata_signing_service=None,
compression_type=COMPRESSION_TYPES.GZ,
):
"""
Creates a repomd.xml file.
Expand All @@ -423,6 +432,8 @@ def generate_repo_metadata(
sub_folder(str): name of the folder for sub repos
metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
A reference to an associated signing service.
compression_type(pulp_rpm.app.constants.COMPRESSION_TYPES):
Compression type to use for metadata files.
"""
cwd = os.getcwd()
Expand All @@ -442,17 +453,20 @@ def generate_repo_metadata(
)

# Prepare metadata files
compression_extension = ".zst" if compression_type == COMPRESSION_TYPES.ZSTD else ".gz"
cr_compression_type = cr.ZSTD if compression_type == COMPRESSION_TYPES.ZSTD else cr.GZ

repomd_path = os.path.join(cwd, "repomd.xml")
pri_xml_path = os.path.join(cwd, "primary.xml.gz")
fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
oth_xml_path = os.path.join(cwd, "other.xml.gz")
upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
pri_xml_path = os.path.join(cwd, "primary.xml") + compression_extension
fil_xml_path = os.path.join(cwd, "filelists.xml") + compression_extension
oth_xml_path = os.path.join(cwd, "other.xml") + compression_extension
upd_xml_path = os.path.join(cwd, "updateinfo.xml") + compression_extension
mod_yml_path = os.path.join(cwd, "modules.yaml")
comps_xml_path = os.path.join(cwd, "comps.xml")

pri_xml = cr.PrimaryXmlFile(pri_xml_path, compressiontype=cr.GZ)
fil_xml = cr.FilelistsXmlFile(fil_xml_path, compressiontype=cr.GZ)
oth_xml = cr.OtherXmlFile(oth_xml_path, compressiontype=cr.GZ)
pri_xml = cr.PrimaryXmlFile(pri_xml_path, compressiontype=cr_compression_type)
fil_xml = cr.FilelistsXmlFile(fil_xml_path, compressiontype=cr_compression_type)
oth_xml = cr.OtherXmlFile(oth_xml_path, compressiontype=cr_compression_type)
upd_xml = None

# We want to support publishing with a different checksum type than the one built-in to the
Expand Down Expand Up @@ -567,7 +581,7 @@ def generate_repo_metadata(
update_records = UpdateRecord.objects.filter(pk__in=content).order_by("id", "digest")
for update_record in update_records.iterator():
if not upd_xml:
upd_xml = cr.UpdateInfoXmlFile(upd_xml_path, compressiontype=cr.GZ)
upd_xml = cr.UpdateInfoXmlFile(upd_xml_path, compressiontype=cr_compression_type)
upd_xml.add_chunk(cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

# Process modulemd, modulemd_defaults and obsoletes
Expand Down
2 changes: 2 additions & 0 deletions pulp_rpm/app/viewsets/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ def create(self, request):
)
repo_config = serializer.validated_data.get("repo_config", repository.repo_config)
repo_config = gpgcheck_options if gpgcheck_options else repo_config
compression_type = serializer.validated_data.get("compression_type")

if repository.metadata_signing_service:
signing_service_pk = repository.metadata_signing_service.pk
Expand All @@ -578,6 +579,7 @@ def create(self, request):
"metadata_signing_service": signing_service_pk,
"checksum_types": checksum_types,
"repo_config": repo_config,
"compression_type": compression_type,
},
)
return OperationPostponedResponse(result, request)
Expand Down
62 changes: 62 additions & 0 deletions pulp_rpm/tests/functional/api/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,35 @@ def test_publish_any_repo_version(
}
rpm_publication_api.create(body)

@pytest.mark.parametrize("compression_type,compression_ext", (("gz", ".gz"), ("zstd", ".zst")))
@pytest.mark.parallel
def test_publish_with_compression_types(
self,
compression_type,
compression_ext,
rpm_unsigned_repo_immediate,
rpm_publication_api,
gen_object_with_cleanup,
rpm_distribution_api,
monitor_task,
):
"""Sync and publish an RPM repository w/ zstd compression and verify it exists."""
# 1. Publish and distribute
publish_data = RpmRpmPublication(
repository=rpm_unsigned_repo_immediate.pulp_href, compression_type=compression_type
)
publish_response = rpm_publication_api.create(publish_data)
created_resources = monitor_task(publish_response.task).created_resources
publication_href = created_resources[0]

body = gen_distribution(publication=publication_href)
distribution = gen_object_with_cleanup(rpm_distribution_api, body)

# 2. Check "primary", "filelists", "other", "updatedinfo" have correct compression ext
for md_type, md_href in self.get_repomd_metadata_urls(distribution.base_url).items():
if md_type in ("primary", "filelists", "other", "updateinfo"):
assert md_href.endswith(compression_ext)

@pytest.mark.parallel
def test_validate_no_checksum_tag(
self,
Expand Down Expand Up @@ -127,6 +156,39 @@ def _get_updateinfo_xml_path(root_elem):
xpath = "{{{}}}location".format(RPM_NAMESPACES["metadata/repo"])
return data_elems[0].find(xpath).get("href")

@staticmethod
def get_repomd_metadata_urls(repomd_url: str):
"""
Helper function to get data types and respective hrefs.
Example:
```
>>> get_repomd_metadata(distribution.base_url)
{
"primary": "repodata/.../primary.xml.gz",
"filelists": "repodata/.../listfiles.xml.gz",
...
}
```
"""
# XML Reference:
# <ns0:repomd xmlns:ns0="http://linux.duke.edu/metadata/repo">
# <ns0:data type="primary">
# <ns0:checksum type="sha256">[…]</ns0:checksum>
# <ns0:location href="repodata/[…]-primary.xml.gz" />
# …
# </ns0:data>
# …
repomd_xml = requests.get(os.path.join(repomd_url, "repodata/repomd.xml")).text
repomd = ElementTree.fromstring(repomd_xml)
xpath_data = "{{{}}}data".format(RPM_NAMESPACES["metadata/repo"])
xpath_location = "{{{}}}location".format(RPM_NAMESPACES["metadata/repo"])
hrefs = {}
for elem in repomd.findall(xpath_data):
md_type = elem.get("type")
hrefs[md_type] = elem.find(xpath_location).get("href")
return hrefs


@pytest.fixture(scope="class")
def assert_created_publication(init_and_sync, rpm_publication_api, monitor_task):
Expand Down

0 comments on commit c59145f

Please sign in to comment.