Made behaviour of keep_at_least parameter predictable (#68)

* Migrate to pydantic v2 * Rewrited keep_at_least logic * Updated pydantic in deps * Updated poerty lock file * Satisfied liuntian * Review fix * Fixed lint
snok · Nov 13, 2023 · 3d27e6a · 3d27e6a
1 parent 7778ed0
commit 3d27e6a
Show file tree

Hide file tree

Showing 4 changed files with 193 additions and 63 deletions.
diff --git a/main.py b/main.py
@@ -3,7 +3,7 @@
 import asyncio
 import os
 import re
-from asyncio import Semaphore
+from asyncio import Semaphore, Task
 from datetime import datetime, timedelta
 from enum import Enum
 from fnmatch import fnmatch
@@ -13,7 +13,7 @@
 
 from dateparser import parse
 from httpx import AsyncClient, TimeoutException
-from pydantic import BaseModel, conint, validator
+from pydantic import BaseModel, ValidationInfo, conint, field_validator
 
 if TYPE_CHECKING:
     from httpx import Response
@@ -327,19 +327,19 @@ class Inputs(BaseModel):
     cut_off: datetime
     timestamp_to_use: TimestampType
     account_type: AccountType
-    org_name: str | None
+    org_name: str | None = None
     untagged_only: bool
     skip_tags: list[str]
     keep_at_least: conint(ge=0) = 0  # type: ignore[valid-type]
     filter_tags: list[str]
     filter_include_untagged: bool = True
     dry_run: bool = False
 
-    @validator('skip_tags', 'filter_tags', 'image_names', pre=True)
+    @field_validator('skip_tags', 'filter_tags', 'image_names', mode='before')
     def parse_comma_separate_string_as_list(cls, v: str) -> list[str]:
         return [i.strip() for i in v.split(',')] if v else []
 
-    @validator('cut_off', pre=True)
+    @field_validator('cut_off', mode='before')
     def parse_human_readable_datetime(cls, v: str) -> datetime:
         parsed_cutoff = parse(v)
         if not parsed_cutoff:
@@ -348,9 +348,9 @@ def parse_human_readable_datetime(cls, v: str) -> datetime:
             raise ValueError('Timezone is required for the cut-off')
         return parsed_cutoff
 
-    @validator('org_name', pre=True)
-    def validate_org_name(cls, v: str, values: dict) -> str | None:
-        if values['account_type'] == AccountType.ORG and not v:
+    @field_validator('org_name', mode='before')
+    def validate_org_name(cls, v: str, values: ValidationInfo) -> str | None:
+        if 'account_type' in values.data and values.data['account_type'] == AccountType.ORG and not v:
             raise ValueError('org-name is required when account-type is org')
         if v:
             return v
@@ -371,13 +371,14 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
     )
 
     # Define list of deletion-tasks to append to
-    tasks = []
+    tasks: list[Task] = []
+    simulated_tasks = 0
 
     # Iterate through dicts of image versions
     sem = Semaphore(50)
 
     async with sem:
-        for version in versions:
+        for idx, version in enumerate(versions):
             # Parse either the update-at timestamp, or the created-at timestamp
             # depending on which on the user has specified that we should use
             updated_or_created_at = getattr(version, inputs.timestamp_to_use.value)
@@ -410,6 +411,9 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
                 # Skipping, because the filter_include_untagged setting is False
                 continue
 
+            # If we got here, most probably we will delete image.
+            # For pseudo-branching we set delete_image to true and
+            # handle cases with delete image by tag filtering in separate pseudo-branch
             delete_image = not inputs.filter_tags
             for filter_tag in inputs.filter_tags:
                 # One thing to note here is that we use fnmatch to support wildcards.
@@ -419,13 +423,21 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
                     delete_image = True
                     break
 
+            if inputs.keep_at_least > 0:
+                if idx + 1 - (len(tasks) + simulated_tasks) > inputs.keep_at_least:
+                    delete_image = True
+                else:
+                    delete_image = False
+
+            # Here we will handle exclusion case
             for skip_tag in inputs.skip_tags:
                 if any(fnmatch(tag, skip_tag) for tag in image_tags):
                     # Skipping because this image version is tagged with a protected tag
                     delete_image = False
 
             if delete_image is True and inputs.dry_run:
                 delete_image = False
+                simulated_tasks += 1
                 print(f'Would delete image {image_name}:{version.id}.')
 
             if delete_image:
@@ -442,12 +454,6 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
                     )
                 )
 
-    # Trim the version list to the n'th element we want to keep
-    if inputs.keep_at_least > 0:
-        for _i in range(0, min(inputs.keep_at_least, len(tasks))):
-            tasks[0].cancel()
-            tasks.remove(tasks[0])
-
     if not tasks:
         print(f'No more versions to delete for {image_name}')
 

diff --git a/main_tests.py b/main_tests.py
@@ -245,6 +245,13 @@ class TestGetAndDeleteOldVersions:
         )
     ]
 
+    @staticmethod
+    def generate_fresh_valid_data_with_id(id):
+        r = deepcopy(TestGetAndDeleteOldVersions.valid_data[0])
+        r.id = id
+        r.created_at = datetime.now(timezone(timedelta()))
+        return r
+
     async def test_delete_package(self, mocker, capsys, http_client):
         # Mock the list function
         mocker.patch.object(main.GithubAPI, 'list_package_versions', return_value=self.valid_data)
@@ -264,6 +271,15 @@ async def test_keep_at_least(self, mocker, capsys, http_client):
         captured = capsys.readouterr()
         assert captured.out == 'No more versions to delete for a\n'
 
+    async def test_keep_at_least_deletes_not_only_marked(self, mocker, capsys, http_client):
+        data = [self.generate_fresh_valid_data_with_id(id) for id in range(3)]
+        data.append(self.valid_data[0])
+        mocker.patch.object(main.GithubAPI, 'list_package_versions', return_value=data)
+        inputs = _create_inputs_model(keep_at_least=2)
+        await get_and_delete_old_versions(image_name='a', inputs=inputs, http_client=http_client)
+        captured = capsys.readouterr()
+        assert captured.out == 'Deleted old image: a:1234567\n'
+
     async def test_not_beyond_cutoff(self, mocker, capsys, http_client):
         response_data = [
             PackageVersionResponse(
@@ -362,8 +378,8 @@ async def test_dry_run(self, mocker, capsys, http_client):
 def test_inputs_bad_account_type():
     # Account type
     _create_inputs_model(account_type='personal')
-    _create_inputs_model(account_type='org')
-    with pytest.raises(ValidationError, match='is not a valid enumeration member'):
+    _create_inputs_model(account_type='org', org_name='myorg')
+    with pytest.raises(ValidationError, match='Input should be \'org\' or \'personal\''):
         _create_inputs_model(account_type='')
 
     # Org name
@@ -374,7 +390,7 @@ def test_inputs_bad_account_type():
     # Timestamp type
     _create_inputs_model(timestamp_to_use='updated_at')
     _create_inputs_model(timestamp_to_use='created_at')
-    with pytest.raises(ValueError, match=' value is not a valid enumeration mem'):
+    with pytest.raises(ValueError, match='Input should be \'updated_at\' or \'created_at\''):
         _create_inputs_model(timestamp_to_use='wat')
 
     # Cut-off
@@ -398,7 +414,7 @@ def test_inputs_bad_account_type():
     assert _create_inputs_model(skip_tags='a , b  ,c').skip_tags == ['a', 'b', 'c']
 
     # Keep at least
-    with pytest.raises(ValueError, match='ensure this value is greater than or equal to 0'):
+    with pytest.raises(ValueError, match='Input should be greater than or equal to 0'):
         _create_inputs_model(keep_at_least='-1')
 
     # Filter tags