From beb47e2cabbafc7b97ac2f402bccef45f556800a Mon Sep 17 00:00:00 2001 From: Mikko Nieminen Date: Wed, 28 Jun 2023 14:41:57 +0200 Subject: [PATCH] add irods trash data statistics (#1658) --- CHANGELOG.rst | 4 ++ irodsbackend/api.py | 7 +++ irodsbackend/plugins.py | 9 ++- irodsbackend/tests/test_api.py | 6 ++ irodsbackend/tests/test_plugins_taskflow.py | 70 +++++++++++++++++++++ taskflowbackend/api.py | 26 +++++++- 6 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 irodsbackend/tests/test_plugins_taskflow.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7ac46477..497defec 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,9 @@ Unreleased Added ----- +- **Irodsbackend** + - ``get_trash_path()`` helper (#1658) + - iRODS trash statistics for siteinfo (#1658) - **Landingzones** - Landing zone updating (#1267) - **Samplesheets** @@ -26,6 +29,7 @@ Added - **Taskflowbackend** - ``BatchCalculateChecksumTask`` iRODS task (#1634) - Automated generation of missing checksums in ``zone_move`` flow (#1634) + - Cleanup of trash collections in testing (#1658) Changed ------- diff --git a/irodsbackend/api.py b/irodsbackend/api.py index 52a34bf0..c99febf4 100644 --- a/irodsbackend/api.py +++ b/irodsbackend/api.py @@ -2,6 +2,7 @@ import logging import math +import os import random import re import string @@ -54,6 +55,7 @@ 'irods_port', ] USER_GROUP_PREFIX = 'omics_project_' +TRASH_COLL_NAME = 'trash' PATH_PARENT_SUBSTRING = '/..' ERROR_PATH_PARENT = 'Use of parent not allowed in path' ERROR_PATH_UNSET = 'Path is not set' @@ -350,6 +352,11 @@ def get_projects_path(cls): """Return the SODAR projects collection path""" return cls.get_root_path() + '/projects' + @classmethod + def get_trash_path(cls): + """Return the trash path in the current zone""" + return '/' + os.path.join(settings.IRODS_ZONE, TRASH_COLL_NAME) + @classmethod def get_uuid_from_path(cls, path, obj_type): """ diff --git a/irodsbackend/plugins.py b/irodsbackend/plugins.py index 742fe2a2..a9e57f68 100644 --- a/irodsbackend/plugins.py +++ b/irodsbackend/plugins.py @@ -78,6 +78,9 @@ def get_statistics(self): project_stats = irods_backend.get_object_stats( irods, irods_backend.get_projects_path() ) + trash_stats = irods_backend.get_object_stats( + irods, irods_backend.get_trash_path() + ) except Exception: return {} return { @@ -86,5 +89,9 @@ def get_statistics(self): 'value': filesizeformat(project_stats['total_size']), 'description': 'Total file size including sample repositories ' 'and landing zones.', - } + }, + 'irods_trash_size': { + 'label': 'Data in iRODS Trash', + 'value': filesizeformat(trash_stats['total_size']), + }, } diff --git a/irodsbackend/tests/test_api.py b/irodsbackend/tests/test_api.py index aeb225be..5b4c5b49 100644 --- a/irodsbackend/tests/test_api.py +++ b/irodsbackend/tests/test_api.py @@ -241,6 +241,12 @@ def test_get_projects_path_with_root_path(self): expected = '/{}/{}/projects'.format(IRODS_ZONE, IRODS_ROOT_PATH) self.assertEqual(self.irods_backend.get_projects_path(), expected) + def test_get_trash_pathg(self): + """Test get_trash_path()""" + self.assertEqual( + self.irods_backend.get_trash_path(), '/{}/trash'.format(IRODS_ZONE) + ) + def test_get_uuid_from_path_assay(self): """Test get_uuid_from_path() with assay path""" path = self.irods_backend.get_path(self.assay) diff --git a/irodsbackend/tests/test_plugins_taskflow.py b/irodsbackend/tests/test_plugins_taskflow.py new file mode 100644 index 00000000..aed6b6bc --- /dev/null +++ b/irodsbackend/tests/test_plugins_taskflow.py @@ -0,0 +1,70 @@ +"""Tests for plugins in the irodsbackend app with Taskflow enabled""" + +import os + +from django.conf import settings + +# Projectroles dependency +from projectroles.models import SODAR_CONSTANTS +from projectroles.plugins import BackendPluginPoint + +# Taskflowbackend dependency +from taskflowbackend.tests.base import TaskflowbackendTestBase + + +# SODAR constants +PROJECT_TYPE_PROJECT = SODAR_CONSTANTS['PROJECT_TYPE_PROJECT'] + +# Local constants +TEST_COLL = 'test' +TEST_FILE = 'test.txt' + + +class TestGetStatistics(TaskflowbackendTestBase): + """Tests for get_statistics()""" + + def setUp(self): + super().setUp() + self.plugin = BackendPluginPoint.get_plugin('omics_irods') + # Make project with owner in Taskflow + self.project, self.owner_as = self.make_project_taskflow( + title='TestProject', + type=PROJECT_TYPE_PROJECT, + parent=self.category, + owner=self.user, + description='description', + public_guest_access=False, + ) + # Set up test collection + self.test_path = os.path.join( + self.irods_backend.get_path(self.project), TEST_COLL + ) + self.test_coll = self.irods.collections.create(self.test_path) + # Set up rods user trash collection if not there + self.trash_path = os.path.join( + self.irods_backend.get_trash_path(), 'home', settings.IRODS_USER + ) + if not self.irods.collections.exists(self.trash_path): + self.irods.collections.create(self.trash_path) + self.trash_coll = self.irods.collections.get(self.trash_path) + + def test_no_files(self): + """Test get_statistics() with no files""" + stats = self.plugin.get_statistics() + # NOTE: filesizeformat() returns non-breakable whitespaces + self.assertEqual(stats['irods_data_size']['value'], '0\xa0bytes') + self.assertEqual(stats['irods_trash_size']['value'], '0\xa0bytes') + + def test_project_file(self): + """Test get_statistics() with file under project collection""" + self.make_irods_object(self.test_coll, TEST_FILE) + stats = self.plugin.get_statistics() + self.assertEqual(stats['irods_data_size']['value'], '1.0\xa0KB') + self.assertEqual(stats['irods_trash_size']['value'], '0\xa0bytes') + + def test_trash_file(self): + """Test get_statistics() with file under trash collection""" + self.make_irods_object(self.trash_coll, TEST_FILE) + stats = self.plugin.get_statistics() + self.assertEqual(stats['irods_data_size']['value'], '0\xa0bytes') + self.assertEqual(stats['irods_trash_size']['value'], '1.0\xa0KB') diff --git a/taskflowbackend/api.py b/taskflowbackend/api.py index cede2002..7855afd4 100644 --- a/taskflowbackend/api.py +++ b/taskflowbackend/api.py @@ -2,6 +2,7 @@ import json import logging +import os from irods.models import TicketQuery, UserGroup @@ -267,8 +268,7 @@ def cleanup(cls): permanent_users = getattr( settings, 'TASKFLOW_TEST_PERMANENT_USERS', DEFAULT_PERMANENT_USERS ) - # TODO: Remove stuff from user folders - # TODO: Remove stuff from trash + # TODO: Remove stuff from user home collections with irods_backend.get_session() as irods: # Remove project folders @@ -294,6 +294,28 @@ def cleanup(cls): irods_backend.delete_ticket(irods, ticket_str) logger.debug('Deleted ticket: {}'.format(ticket_str)) + # Remove data objects and unneeded collections from trash + trash_path = irods_backend.get_trash_path() + trash_coll = irods.collections.get(trash_path) + # NOTE: We can't delete the home trash collection + trash_home_path = os.path.join(trash_path, 'home') + for coll in irods_backend.get_colls_recursively(trash_coll): + if irods.collections.exists( + coll.path + ) and not coll.path.startswith(trash_home_path): + irods.collections.remove( + coll.path, recurse=True, force=True + ) + obj_paths = [ + o['path'] + for o in irods_backend.get_objs_recursively(irods, trash_coll) + + irods_backend.get_objs_recursively( + irods, trash_coll, md5=True + ) + ] + for path in obj_paths: + irods.data_objects.unlink(path, force=True) + @classmethod def get_error_msg(cls, flow_name, submit_info): """