-
Notifications
You must be signed in to change notification settings - Fork 608
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add --sort
arg to delete-cache
to sort by size
#2815
base: main
Are you sure you want to change the base?
Changes from all commits
d43898e
2dacb5e
51e9453
99da73f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -18,6 +18,7 @@ | |||||
huggingface-cli delete-cache | ||||||
huggingface-cli delete-cache --disable-tui | ||||||
huggingface-cli delete-cache --dir ~/.cache/huggingface/hub | ||||||
huggingface-cli delete-cache --sort | ||||||
|
||||||
NOTE: | ||||||
This command is based on `InquirerPy` to build the multiselect menu in the terminal. | ||||||
|
@@ -50,7 +51,6 @@ | |||||
TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ? | ||||||
TODO: add "--keep-last" arg to delete revisions that are not on `main` ref | ||||||
TODO: add "--filter" arg to filter repositories by name ? | ||||||
TODO: add "--sort" arg to sort by size ? | ||||||
TODO: add "--limit" arg to limit to X repos ? | ||||||
TODO: add "-y" arg for immediate deletion ? | ||||||
See discussions in https://github.com/huggingface/huggingface_hub/issues/1025. | ||||||
|
@@ -120,11 +120,26 @@ def register_subcommand(parser: _SubParsersAction): | |||||
), | ||||||
) | ||||||
|
||||||
delete_cache_parser.add_argument( | ||||||
"--sort", | ||||||
nargs="?", | ||||||
choices=["size", "alphabetical", "lastUpdated", "lastUsed"], | ||||||
const="size", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
By default, let's not sort |
||||||
help=( | ||||||
"Sort repositories by the specified criteria. Options: " | ||||||
"'size' (largest first), " | ||||||
"'alphabetical' (A-Z), " | ||||||
"'lastUpdated' (newest first), " | ||||||
"'lastUsed' (most recent first)" | ||||||
), | ||||||
) | ||||||
|
||||||
delete_cache_parser.set_defaults(func=DeleteCacheCommand) | ||||||
|
||||||
def __init__(self, args: Namespace) -> None: | ||||||
self.cache_dir: Optional[str] = args.dir | ||||||
self.disable_tui: bool = args.disable_tui | ||||||
self.sort_by: Optional[str] = args.sort | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you define a SortingOption_T = Literal["size", "alphabetical", "lastUpdated", "lastUsed"] and then use it throughout the module like this?
Suggested change
|
||||||
|
||||||
def run(self): | ||||||
"""Run `delete-cache` command with or without TUI.""" | ||||||
|
@@ -133,9 +148,9 @@ def run(self): | |||||
|
||||||
# Manual review from the user | ||||||
if self.disable_tui: | ||||||
selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[]) | ||||||
selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=self.sort_by) | ||||||
else: | ||||||
selected_hashes = _manual_review_tui(hf_cache_info, preselected=[]) | ||||||
selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=self.sort_by) | ||||||
|
||||||
# If deletion is not cancelled | ||||||
if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes: | ||||||
|
@@ -164,13 +179,21 @@ def run(self): | |||||
|
||||||
|
||||||
@require_inquirer_py | ||||||
def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]: | ||||||
def _manual_review_tui( | ||||||
hf_cache_info: HFCacheInfo, | ||||||
preselected: List[str], | ||||||
sort_by: Optional[str] = None, | ||||||
) -> List[str]: | ||||||
"""Ask the user for a manual review of the revisions to delete. | ||||||
|
||||||
Displays a multi-select menu in the terminal (TUI). | ||||||
""" | ||||||
# Define multiselect list | ||||||
choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected) | ||||||
choices = _get_tui_choices_from_scan( | ||||||
repos=hf_cache_info.repos, | ||||||
preselected=preselected, | ||||||
sort_by=sort_by, | ||||||
) | ||||||
checkbox = inquirer.checkbox( | ||||||
message="Select revisions to delete:", | ||||||
choices=choices, # List of revisions with some pre-selection | ||||||
|
@@ -213,22 +236,27 @@ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool: | |||||
return inquirer.confirm(message, default=default).execute() | ||||||
|
||||||
|
||||||
def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: List[str]) -> List: | ||||||
def _get_tui_choices_from_scan( | ||||||
repos: Iterable[CachedRepoInfo], | ||||||
preselected: List[str], | ||||||
sort_by: Optional[str] = None, | ||||||
) -> List: | ||||||
"""Build a list of choices from the scanned repos. | ||||||
|
||||||
Args: | ||||||
repos (*Iterable[`CachedRepoInfo`]*): | ||||||
List of scanned repos on which we want to delete revisions. | ||||||
preselected (*List[`str`]*): | ||||||
List of revision hashes that will be preselected. | ||||||
sort_by (*Optional[str]*): | ||||||
Sorting direction. Choices: "size", "alphabetical", "lastUpdated", "lastUsed". | ||||||
|
||||||
Return: | ||||||
The list of choices to pass to `inquirer.checkbox`. | ||||||
""" | ||||||
choices: List[Union[Choice, Separator]] = [] | ||||||
|
||||||
# First choice is to cancel the deletion. If selected, nothing will be deleted, | ||||||
# no matter the other selected items. | ||||||
# First choice is to cancel the deletion | ||||||
choices.append( | ||||||
Choice( | ||||||
_CANCEL_DELETION_STR, | ||||||
|
@@ -237,8 +265,19 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis | |||||
) | ||||||
) | ||||||
|
||||||
# Display a separator per repo and a Choice for each revisions of the repo | ||||||
for repo in sorted(repos, key=_repo_sorting_order): | ||||||
# Sort repos based on specified criteria | ||||||
sorted_repos = sorted( | ||||||
repos, | ||||||
key=lambda repo: { | ||||||
"size": lambda r: -r.size_on_disk, # largest first | ||||||
"alphabetical": lambda r: (r.repo_type, r.repo_id.lower()), # by type then name | ||||||
"lastUpdated": lambda r: -max(rev.last_modified for rev in r.revisions), # newest first | ||||||
"lastUsed": lambda r: -r.last_accessed, # most recently used first | ||||||
None: lambda r: (r.repo_type, r.repo_id), # default stable order | ||||||
}[sort_by](repo), | ||||||
) | ||||||
|
||||||
for repo in sorted_repos: | ||||||
# Repo as separator | ||||||
choices.append( | ||||||
Separator( | ||||||
|
@@ -264,7 +303,11 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis | |||||
return choices | ||||||
|
||||||
|
||||||
def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]: | ||||||
def _manual_review_no_tui( | ||||||
hf_cache_info: HFCacheInfo, | ||||||
preselected: List[str], | ||||||
sort_by: Optional[str] = None, | ||||||
) -> List[str]: | ||||||
"""Ask the user for a manual review of the revisions to delete. | ||||||
|
||||||
Used when TUI is disabled. Manual review happens in a separate tmp file that the | ||||||
|
@@ -275,7 +318,11 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> | |||||
os.close(fd) | ||||||
|
||||||
lines = [] | ||||||
for repo in sorted(hf_cache_info.repos, key=_repo_sorting_order): | ||||||
sorted_repos = sorted( | ||||||
hf_cache_info.repos, | ||||||
key=lambda repo: -repo.size_on_disk if sort_by == "size" else 1, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you move the sorting logic defined above in the TUI part to a separate method so that it can be reused here? So that the "no tui" path can also support all sorting options. |
||||||
) | ||||||
for repo in sorted_repos: | ||||||
lines.append( | ||||||
f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str}," | ||||||
f" used {repo.last_accessed_str})" | ||||||
|
@@ -314,9 +361,9 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> | |||||
): | ||||||
break | ||||||
|
||||||
# 4. Return selected_hashes | ||||||
# 4. Return selected_hashes sorted to maintain stable order | ||||||
os.remove(tmp_path) | ||||||
return selected_hashes | ||||||
return sorted(selected_hashes) # Sort to maintain stable order | ||||||
|
||||||
|
||||||
def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool: | ||||||
|
@@ -418,11 +465,6 @@ def _read_manual_review_tmp_file(tmp_path: str) -> List[str]: | |||||
""".strip() | ||||||
|
||||||
|
||||||
def _repo_sorting_order(repo: CachedRepoInfo) -> Any: | ||||||
# First split by Dataset/Model, then sort by last accessed (oldest first) | ||||||
return (repo.repo_type, repo.last_accessed) | ||||||
|
||||||
|
||||||
def _revision_sorting_order(revision: CachedRevisionInfo) -> Any: | ||||||
# Sort by last modified (oldest first) | ||||||
return revision.last_modified |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(to showcase how to use it)