Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prune of unused artifacts links via client #2192

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions src/zenml/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2786,6 +2786,43 @@ def delete_artifact(
self.zen_store.delete_artifact(artifact_id=artifact.id)
logger.info(f"Deleted artifact '{artifact.name}'.")

def prune_artifacts(
self, only_artifact: bool = False, only_metadata: bool = False
) -> None:
"""Delete all unused artifacts and artifact versions.

Args:
only_artifact: Only delete artifacts
only_metadata: Only delete artifact metadata

Raises:
RuntimeError: If any artifact cannot be deleted.
"""
unused_artifact_versions = depaginate(
partial(self.list_artifact_versions, only_unused=True)
)

if not unused_artifact_versions:
logger.info("No unused artifact versions found.")
return

for unused_artifact_version in unused_artifact_versions:
try:
self.delete_artifact_version(
name_id_or_prefix=unused_artifact_version.id,
delete_metadata=not only_artifact,
delete_from_artifact_store=not only_metadata,
)
unused_artifact = unused_artifact_version.artifact
if not unused_artifact.versions and not only_artifact:
self.delete_artifact(unused_artifact.id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to the PR where you implemented the "delete all model version artifacts", there is a performance hit with this type of loop that ends up calling the REST API multiple times. You should consider implementing this as a REST API endpoint and looping in the SQL zen store instead (perhaps even deleting entities en-masse through a single SQL query, if possible). This opens the possibility of using this functionality through the dashboard.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey, I'm half way through implementing this change, but stuck at the verify_permissions_and_delete_entity. It was rather clear what to check on single entry deletion, but here I prune Artifact, Artifact Version and Data at scale. Any thoughts? Shall I check for Admin permissions to do so?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only have a half-baked idea about how to get around this: maybe we need a new prune permission that you check globally at server level, as opposed to checking against every resource. But that requires more work, not only here, but probably also at Cloud API level.

@schustmi what do you think ?


except Exception as e:
strickvl marked this conversation as resolved.
Show resolved Hide resolved
raise RuntimeError(
f"Failed to prune artifacts, error deleting artifact `{unused_artifact_version.id}`, error `{e}`."
)
logger.info("All unused artifacts and artifact versions deleted.")

# --------------------------- Artifact Versions ---------------------------

def get_artifact_version(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""endpoint_artifact>deployment_artifact [4e1972485075].

Revision ID: 4e1972485075
Revises: 0.52.0
Revises: 5cc3f41cf048
Create Date: 2023-12-12 10:51:44.177810

"""
Expand All @@ -10,7 +10,7 @@

# revision identifiers, used by Alembic.
revision = "4e1972485075"
down_revision = "0.52.0"
down_revision = "5cc3f41cf048"
branch_labels = None
depends_on = None

Expand Down
32 changes: 32 additions & 0 deletions tests/integration/functional/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
int_plus_one_test_step,
)
from tests.integration.functional.utils import sample_name
from zenml import ExternalArtifact
from zenml.client import Client
from zenml.config.pipeline_spec import PipelineSpec
from zenml.config.source import Source
Expand Down Expand Up @@ -1128,6 +1129,37 @@ def test_basic_crud_for_entity(
pass


class TestArtifact:
def test_prune(self, clean_client: "Client"):
avishniakov marked this conversation as resolved.
Show resolved Hide resolved
"""Test that artifact pruning works."""
artifact_id = ExternalArtifact(value="foo").upload_by_value()
artifact = clean_client.get_artifact_version(artifact_id)
assert artifact is not None
clean_client.prune_artifacts()
with pytest.raises(KeyError):
clean_client.get_artifact_version(artifact_id)
avishniakov marked this conversation as resolved.
Show resolved Hide resolved
assert not os.path.exists(artifact.uri)

def test_prune_only_metadata(self, clean_client: "Client"):
"""Test that artifact pruning works with only metadata flag."""
artifact_id = ExternalArtifact(value="foo").upload_by_value()
artifact = clean_client.get_artifact_version(artifact_id)
assert artifact is not None
clean_client.prune_artifacts(only_metadata=True)
with pytest.raises(KeyError):
clean_client.get_artifact_version(artifact_id)
assert os.path.exists(artifact.uri)

def test_prune_only_artifact(self, clean_client: "Client"):
"""Test that artifact pruning works with only artifacts flag."""
artifact_id = ExternalArtifact(value="foo").upload_by_value()
artifact = clean_client.get_artifact_version(artifact_id)
assert artifact is not None
clean_client.prune_artifacts(only_artifact=True)
assert clean_client.get_artifact_version(artifact_id).id == artifact.id
assert not os.path.exists(artifact.uri)


class TestModel:
MODEL_NAME = "foo"

Expand Down
Loading