forked from pulp/pulp_rpm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added /prune/ endpoint to removing "old" RPMs from a Repository.
closes pulp#2909.
- Loading branch information
Showing
11 changed files
with
351 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Added /rpm/prune command to allow "pruning" old Packages from repositories. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from gettext import gettext as _ | ||
|
||
from rest_framework import fields, serializers | ||
|
||
from pulp_rpm.app.models import RpmRepository | ||
|
||
from pulpcore.plugin.serializers import ValidateFieldsMixin | ||
from pulpcore.plugin.util import get_domain | ||
|
||
|
||
class PruneNEVRAsSerializer(serializers.Serializer, ValidateFieldsMixin): | ||
""" | ||
Serializer for prune-old-NEVRAs operation. | ||
""" | ||
|
||
repo_hrefs = fields.ListField( | ||
required=True, | ||
help_text=_( | ||
"Will prune old RPMs from the specified list of repos. Use ['*'] to specify all repos." | ||
), | ||
) | ||
|
||
keep_days = serializers.IntegerField( | ||
help_text=_( | ||
"Prune NEVRAs introduced *prior-to* this many days ago. " | ||
"Default is 14. A value of 0 implies 'keep latest NEVRA only.'" | ||
), | ||
required=False, | ||
default=14, | ||
) | ||
|
||
repo_concurrency = serializers.IntegerField( | ||
help_text=( | ||
"Number of concurrent workers to use to do the pruning. " | ||
"If not set then the default value will be used." | ||
), | ||
allow_null=True, | ||
required=False, | ||
min_value=1, | ||
default=10, | ||
) | ||
|
||
dry_run = serializers.BooleanField( | ||
help_text=_( | ||
"Determine what would-be-pruned and log the list of NEVRAs. " | ||
"Intended as a debugging aid." | ||
), | ||
default=False, | ||
required=False, | ||
) | ||
|
||
def validate_repo_hrefs(self, value): | ||
""" | ||
Check that repo_hrefs is not an empty list and contains either valid hrefs or "*". | ||
Args: | ||
value (list): The list supplied by the user | ||
Returns: | ||
The list of RpmRepositories after validation | ||
Raises: | ||
ValidationError: If the list is empty or contains invalid hrefs. | ||
""" | ||
if len(value) == 0: | ||
raise serializers.ValidationError("Must not be [].") | ||
|
||
# prune-all-repos is "*" - find all repos in this domain | ||
if "*" in value: | ||
if len(value) != 1: | ||
raise serializers.ValidationError("Can't specify specific HREFs when using '*'") | ||
return RpmRepository.objects.filter(pulp_domain=get_domain()) | ||
|
||
from pulpcore.app.viewsets import NamedModelViewSet | ||
|
||
# We're pruning a specific list of RPM repositories. | ||
# Validate that they are for RpmRepositories. | ||
hrefs_to_return = [] | ||
for href in value: | ||
hrefs_to_return.append(NamedModelViewSet.get_resource(href, RpmRepository)) | ||
|
||
return hrefs_to_return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
from datetime import datetime, timedelta | ||
from gettext import gettext as _ | ||
from logging import getLogger, DEBUG | ||
|
||
from django.db.models import F, Max, Subquery | ||
from django.utils import timezone | ||
|
||
from pulpcore.app.models import ProgressReport | ||
from pulpcore.constants import TASK_STATES | ||
from pulpcore.plugin.models import ( | ||
GroupProgressReport, | ||
RepositoryContent, | ||
TaskGroup, | ||
) | ||
from pulpcore.plugin.tasking import dispatch | ||
from pulp_rpm.app.models.package import Package | ||
from pulp_rpm.app.models.repository import RpmRepository | ||
|
||
log = getLogger(__name__) | ||
|
||
|
||
def prune_repo_nevras(repo_pk, keep_days, dry_run): | ||
""" | ||
This task prunes old NEVRAs from the latest_version of the specified repository. | ||
Args: | ||
repo_pk (UUID): UUID of the RpmRepository to be pruned. | ||
keep_days(int): Keep RepositoryContent created less than this many days ago. | ||
dry_run (boolean): If True, don't actually do the prune, just log to-be-pruned NEVRAs. | ||
""" | ||
repo = RpmRepository.objects.filter(pk=repo_pk).get() | ||
curr_vers = repo.latest_version() | ||
eldest_datetime = datetime.now(tz=timezone.utc) - timedelta(days=keep_days) | ||
log.info(_("PRUNING REPOSITORY {}.").format(repo.name)) | ||
log.debug(f">>> TOTAL RPMS: {curr_vers.get_content(Package.objects).count()}") | ||
|
||
# We only care about RPM-names that have more than one EVRA - "singles" are always kept. | ||
rpm_by_name_age = ( | ||
curr_vers.get_content(Package.objects.with_age()) | ||
.filter(age__gt=1) | ||
.order_by("name", "epoch", "version", "release", "arch") | ||
.values("pk") | ||
) | ||
log.debug( | ||
">>> # NAME/ARCH w/ MULTIPLE EVRs: {}" | ||
.format(curr_vers.get_content(Package.objects).filter(pk__in=rpm_by_name_age).values("name", "arch").distinct().count()) | ||
) | ||
log.debug( | ||
">>> # UNIQUE NAMES: {}" | ||
.format(curr_vers.get_content(Package.objects).values("name", "arch").distinct().count()) | ||
) | ||
|
||
# Find the RepositoryContents associated with the multi-EVR-names from above, | ||
# whose maximum-pulp-created date is LESS THAN eldest_datetime. | ||
# BThe Max() check is necessary because content can be added, removed, and re-added | ||
# any number of times to the same repository, and we only want to prune content whose | ||
# *most recent add* is older than the user-specified date. | ||
# | ||
# Note that we can ignore remove-dates, since we're limiting ourselves to a list of ids | ||
# that we know are in the repo's current latest-version. | ||
target_ids_q = ( | ||
RepositoryContent.objects.filter(content__in=Subquery(rpm_by_name_age)) | ||
.annotate(latest_add_date=Max("pulp_created")) | ||
.values("pk", "latest_add_date") | ||
.filter(latest_add_date__lt=eldest_datetime) | ||
.values("content_id") | ||
) | ||
log.debug(f">>> TARGET IDS: {target_ids_q.count()}.") | ||
|
||
to_be_removed = target_ids_q.count() | ||
# Use the progressreport to report back numbers. The prune happens as one | ||
# action. | ||
data = dict( | ||
message=f"Pruning {repo.name}", | ||
code="rpm.nevra.prune.repository", | ||
total=to_be_removed, | ||
state=TASK_STATES.COMPLETED, | ||
done=0, | ||
) | ||
|
||
if dry_run: | ||
if log.getEffectiveLevel() == DEBUG: # Don't go through the loop unless debugging | ||
for p in Package.objects.filter(pk__in=target_ids_q).order_by( | ||
"name", "epoch", "version", "release", "arch" | ||
): | ||
log.debug(f"Package {p.nevra} would be removed.") | ||
else: | ||
with repo.new_version(base_version=None) as new_version: | ||
new_version.remove_content(target_ids_q) | ||
data["done"] = to_be_removed | ||
|
||
pb = ProgressReport(**data) | ||
pb.save() | ||
|
||
# Report back that this repo has completed. | ||
gpr = TaskGroup.current().group_progress_reports.filter(code="rpm.nevra.prune") | ||
gpr.update(done=F("done") + 1) | ||
|
||
|
||
def prune_nevras( | ||
repo_pks, | ||
keep_days=14, | ||
repo_concurrency=10, | ||
dry_run=False, | ||
): | ||
""" | ||
This task prunes old NEVRAs from the latest_version of the specified list of repos. | ||
"Old" in this context is defined by the RepositoryContent record that added a NEVRA | ||
to the repository in question. | ||
It will issue one task-per-repository. | ||
Kwargs: | ||
repo_pks (list): A list of repo pks the disk reclaim space is performed on. | ||
keep_days(int): Keep RepositoryContent created less than this many days ago. | ||
repo_concurrency (int): number of repos to prune at a time. | ||
dry_run (boolean): If True, don't actually do the prune, just record to-be-pruned NEVRAs. | ||
""" | ||
|
||
repos_to_prune = RpmRepository.objects.filter(pk__in=repo_pks) | ||
task_group = TaskGroup.current() | ||
|
||
gpr = GroupProgressReport( | ||
message="Pruning old NEVRAs", | ||
code="rpm.nevra.prune", | ||
total=len(repo_pks), | ||
done=0, | ||
task_group=task_group, | ||
) | ||
gpr.save() | ||
|
||
# Dispatch a task-per-repository. | ||
# Lock on the the repository *and* to insure the max-concurrency specified. | ||
# This will keep an "all repositories" prune from locking up all the workers | ||
# until all repositories are completed. | ||
for index, a_repo in enumerate(repos_to_prune): | ||
worker_rsrc = f"rpm-prune-worker-{index % repo_concurrency}" | ||
exclusive_resources = [worker_rsrc, a_repo] | ||
|
||
dispatch( | ||
prune_repo_nevras, | ||
exclusive_resources=exclusive_resources, | ||
args=( | ||
a_repo.pk, | ||
keep_days, | ||
dry_run, | ||
), | ||
task_group=task_group, | ||
) | ||
task_group.finish() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from drf_spectacular.utils import extend_schema | ||
from django.conf import settings | ||
from rest_framework.viewsets import ViewSet | ||
|
||
from pulpcore.plugin.viewsets import TaskGroupOperationResponse | ||
from pulpcore.plugin.models import TaskGroup | ||
from pulpcore.plugin.serializers import TaskGroupOperationResponseSerializer | ||
from pulp_rpm.app.serializers import PruneNEVRAsSerializer | ||
from pulp_rpm.app.tasks import prune_nevras | ||
from pulpcore.plugin.tasking import dispatch | ||
|
||
|
||
class PruneNEVRAsViewSet(ViewSet): | ||
""" | ||
Viewset for prune-old-NEVRAs endpoint. | ||
""" | ||
|
||
serializer_class = PruneNEVRAsSerializer | ||
|
||
DEFAULT_ACCESS_POLICY = { | ||
"statements": [ | ||
{ | ||
"action": ["prune_nevras"], | ||
"principal": "authenticated", | ||
"effect": "allow", | ||
"condition": [ | ||
"has_model_or_domain_or_obj_perms:rpm.prune_rpmrepository", | ||
], | ||
}, | ||
], | ||
} | ||
|
||
@extend_schema( | ||
description="Trigger an asynchronous old-NEVRA-prune operation.", | ||
responses={202: TaskGroupOperationResponseSerializer}, | ||
) | ||
def prune_nevras(self, request): | ||
""" | ||
Triggers an asynchronous old-NEVRA-purge operation. | ||
This returns a task-group that contains a "master" task that dispatches one task | ||
per repo being pruned. This allows repositories to become available for other | ||
processing as soon as their task completes, rather than having to wait for *all* | ||
repositories to be pruned. | ||
""" | ||
serializer = PruneNEVRAsSerializer(data=request.data) | ||
serializer.is_valid(raise_exception=True) | ||
|
||
repos = serializer.validated_data.get("repo_hrefs", []) | ||
repos_to_prune_pks = [] | ||
for repo in repos: | ||
repos_to_prune_pks.append(repo.pk) | ||
|
||
uri = "/api/v3/rpm/prune/" | ||
if settings.DOMAIN_ENABLED: | ||
uri = f"/{request.pulp_domain.name}{uri}" | ||
exclusive_resources = [uri, f"pdrn:{request.pulp_domain.pulp_id}:rpm:prune"] | ||
|
||
task_group = TaskGroup.objects.create(description="Prune old NEVRAs.") | ||
|
||
dispatch( | ||
prune_nevras, | ||
exclusive_resources=exclusive_resources, | ||
task_group=task_group, | ||
kwargs={ | ||
"repo_pks": repos_to_prune_pks, | ||
"keep_days": serializer.validated_data["keep_days"], | ||
"repo_concurrency": serializer.validated_data["repo_concurrency"], | ||
"dry_run": serializer.validated_data["dry_run"], | ||
}, | ||
) | ||
return TaskGroupOperationResponse(task_group, request) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.