Skip to content

Commit

Permalink
Made behaviour of keep_at_least parameter predictable (#68)
Browse files Browse the repository at this point in the history
* Migrate to pydantic v2

* Rewrited keep_at_least logic

* Updated pydantic in deps

* Updated poerty lock file

* Satisfied liuntian

* Review fix

* Fixed lint
  • Loading branch information
yma-het authored Nov 13, 2023
1 parent 7778ed0 commit 3d27e6a
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 63 deletions.
38 changes: 22 additions & 16 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import asyncio
import os
import re
from asyncio import Semaphore
from asyncio import Semaphore, Task
from datetime import datetime, timedelta
from enum import Enum
from fnmatch import fnmatch
Expand All @@ -13,7 +13,7 @@

from dateparser import parse
from httpx import AsyncClient, TimeoutException
from pydantic import BaseModel, conint, validator
from pydantic import BaseModel, ValidationInfo, conint, field_validator

if TYPE_CHECKING:
from httpx import Response
Expand Down Expand Up @@ -327,19 +327,19 @@ class Inputs(BaseModel):
cut_off: datetime
timestamp_to_use: TimestampType
account_type: AccountType
org_name: str | None
org_name: str | None = None
untagged_only: bool
skip_tags: list[str]
keep_at_least: conint(ge=0) = 0 # type: ignore[valid-type]
filter_tags: list[str]
filter_include_untagged: bool = True
dry_run: bool = False

@validator('skip_tags', 'filter_tags', 'image_names', pre=True)
@field_validator('skip_tags', 'filter_tags', 'image_names', mode='before')
def parse_comma_separate_string_as_list(cls, v: str) -> list[str]:
return [i.strip() for i in v.split(',')] if v else []

@validator('cut_off', pre=True)
@field_validator('cut_off', mode='before')
def parse_human_readable_datetime(cls, v: str) -> datetime:
parsed_cutoff = parse(v)
if not parsed_cutoff:
Expand All @@ -348,9 +348,9 @@ def parse_human_readable_datetime(cls, v: str) -> datetime:
raise ValueError('Timezone is required for the cut-off')
return parsed_cutoff

@validator('org_name', pre=True)
def validate_org_name(cls, v: str, values: dict) -> str | None:
if values['account_type'] == AccountType.ORG and not v:
@field_validator('org_name', mode='before')
def validate_org_name(cls, v: str, values: ValidationInfo) -> str | None:
if 'account_type' in values.data and values.data['account_type'] == AccountType.ORG and not v:
raise ValueError('org-name is required when account-type is org')
if v:
return v
Expand All @@ -371,13 +371,14 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
)

# Define list of deletion-tasks to append to
tasks = []
tasks: list[Task] = []
simulated_tasks = 0

# Iterate through dicts of image versions
sem = Semaphore(50)

async with sem:
for version in versions:
for idx, version in enumerate(versions):
# Parse either the update-at timestamp, or the created-at timestamp
# depending on which on the user has specified that we should use
updated_or_created_at = getattr(version, inputs.timestamp_to_use.value)
Expand Down Expand Up @@ -410,6 +411,9 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
# Skipping, because the filter_include_untagged setting is False
continue

# If we got here, most probably we will delete image.
# For pseudo-branching we set delete_image to true and
# handle cases with delete image by tag filtering in separate pseudo-branch
delete_image = not inputs.filter_tags
for filter_tag in inputs.filter_tags:
# One thing to note here is that we use fnmatch to support wildcards.
Expand All @@ -419,13 +423,21 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
delete_image = True
break

if inputs.keep_at_least > 0:
if idx + 1 - (len(tasks) + simulated_tasks) > inputs.keep_at_least:
delete_image = True
else:
delete_image = False

# Here we will handle exclusion case
for skip_tag in inputs.skip_tags:
if any(fnmatch(tag, skip_tag) for tag in image_tags):
# Skipping because this image version is tagged with a protected tag
delete_image = False

if delete_image is True and inputs.dry_run:
delete_image = False
simulated_tasks += 1
print(f'Would delete image {image_name}:{version.id}.')

if delete_image:
Expand All @@ -442,12 +454,6 @@ async def get_and_delete_old_versions(image_name: str, inputs: Inputs, http_clie
)
)

# Trim the version list to the n'th element we want to keep
if inputs.keep_at_least > 0:
for _i in range(0, min(inputs.keep_at_least, len(tasks))):
tasks[0].cancel()
tasks.remove(tasks[0])

if not tasks:
print(f'No more versions to delete for {image_name}')

Expand Down
24 changes: 20 additions & 4 deletions main_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,13 @@ class TestGetAndDeleteOldVersions:
)
]

@staticmethod
def generate_fresh_valid_data_with_id(id):
r = deepcopy(TestGetAndDeleteOldVersions.valid_data[0])
r.id = id
r.created_at = datetime.now(timezone(timedelta()))
return r

async def test_delete_package(self, mocker, capsys, http_client):
# Mock the list function
mocker.patch.object(main.GithubAPI, 'list_package_versions', return_value=self.valid_data)
Expand All @@ -264,6 +271,15 @@ async def test_keep_at_least(self, mocker, capsys, http_client):
captured = capsys.readouterr()
assert captured.out == 'No more versions to delete for a\n'

async def test_keep_at_least_deletes_not_only_marked(self, mocker, capsys, http_client):
data = [self.generate_fresh_valid_data_with_id(id) for id in range(3)]
data.append(self.valid_data[0])
mocker.patch.object(main.GithubAPI, 'list_package_versions', return_value=data)
inputs = _create_inputs_model(keep_at_least=2)
await get_and_delete_old_versions(image_name='a', inputs=inputs, http_client=http_client)
captured = capsys.readouterr()
assert captured.out == 'Deleted old image: a:1234567\n'

async def test_not_beyond_cutoff(self, mocker, capsys, http_client):
response_data = [
PackageVersionResponse(
Expand Down Expand Up @@ -362,8 +378,8 @@ async def test_dry_run(self, mocker, capsys, http_client):
def test_inputs_bad_account_type():
# Account type
_create_inputs_model(account_type='personal')
_create_inputs_model(account_type='org')
with pytest.raises(ValidationError, match='is not a valid enumeration member'):
_create_inputs_model(account_type='org', org_name='myorg')
with pytest.raises(ValidationError, match='Input should be \'org\' or \'personal\''):
_create_inputs_model(account_type='')

# Org name
Expand All @@ -374,7 +390,7 @@ def test_inputs_bad_account_type():
# Timestamp type
_create_inputs_model(timestamp_to_use='updated_at')
_create_inputs_model(timestamp_to_use='created_at')
with pytest.raises(ValueError, match=' value is not a valid enumeration mem'):
with pytest.raises(ValueError, match='Input should be \'updated_at\' or \'created_at\''):
_create_inputs_model(timestamp_to_use='wat')

# Cut-off
Expand All @@ -398,7 +414,7 @@ def test_inputs_bad_account_type():
assert _create_inputs_model(skip_tags='a , b ,c').skip_tags == ['a', 'b', 'c']

# Keep at least
with pytest.raises(ValueError, match='ensure this value is greater than or equal to 0'):
with pytest.raises(ValueError, match='Input should be greater than or equal to 0'):
_create_inputs_model(keep_at_least='-1')

# Filter tags
Expand Down
Loading

0 comments on commit 3d27e6a

Please sign in to comment.