Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cylc clean 3: targeted clean #4237

Merged
merged 22 commits into from
Jun 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
988648f
Type annotations
MetRonnie Mar 30, 2021
506bb38
cylc clean: add --remote-only option
MetRonnie Mar 31, 2021
eacdcce
Fix & refactor cylc clean unit tests
MetRonnie Mar 31, 2021
a037073
cylc clean: Implement targeted clean
MetRonnie Apr 7, 2021
505b27b
Fix error when share/cycle sylink dir same as share
MetRonnie Apr 13, 2021
262126e
Allow symlink dirs in global config to be unset
MetRonnie Apr 13, 2021
b02d74f
Fix #4133 - better error handling when rmtree fails
MetRonnie Apr 22, 2021
7e0ee50
Tidy
MetRonnie Apr 29, 2021
fa112a2
cylc clean: Fix deletion of symlink dir targets in targeted clean
MetRonnie Apr 26, 2021
4581158
cylc clean: overhaul targeted clean to make globs work
MetRonnie Jun 1, 2021
2268f20
Refactor cylc clean test into fixture
MetRonnie Jun 1, 2021
d7a9f82
cylc clean: Fix tidying of empty run dir parents
MetRonnie Jun 2, 2021
634109b
cylc clean: add help examples
MetRonnie Jun 2, 2021
a2fcaea
Remote clean: iterate over proc queue more elegantly
MetRonnie Jun 4, 2021
6eb3500
Update changelog
MetRonnie Jun 4, 2021
82ec437
Address code review
MetRonnie Jun 9, 2021
2300f4d
Targeted clean: Overhaul processing of glob results
MetRonnie Jun 9, 2021
011e6e0
Move glob_run_dir() to workflow_files
MetRonnie Jun 11, 2021
3a44181
Merge branch 'master' into cylc-clean
MetRonnie Jun 21, 2021
2dc0f0d
Fix small mistake
MetRonnie Jun 21, 2021
4f23670
Fix test failures
MetRonnie Jun 21, 2021
fadda2a
Remote clean: only re-run on different platform if SSH failed (255)
MetRonnie Jun 23, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ version control information on installation of a workflow.
[#4222](https://github.com/cylc/cylc-flow/pull/4222) - Fix bug where a
workflow's public database file was not closed properly.

[#4237](https://github.com/cylc/cylc-flow/pull/4237) - `cylc clean` can now
remove specific sub-directories instead of the whole run directory, using the
`--rm` option. There are also the options `--local-only` and `--remote-only`
for choosing to only clean on the local filesystem or remote install targets
respectively.

### Fixes

[#4248](https://github.com/cylc/cylc-flow/pull/4248)
Expand Down
29 changes: 27 additions & 2 deletions cylc/flow/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Exceptions for "expected" errors."""


from typing import Iterable
import errno
from typing import Callable, Iterable, NoReturn, Tuple, Type


class CylcError(Exception):
Expand Down Expand Up @@ -99,6 +99,31 @@ class WorkflowFilesError(CylcError):
"""Exception for errors related to workflow files/directories."""


def handle_rmtree_err(
function: Callable,
path: str,
excinfo: Tuple[Type[Exception], Exception, object]
) -> NoReturn:
"""Error handler for shutil.rmtree."""
exc = excinfo[1]
if isinstance(exc, OSError) and exc.errno == errno.ENOTEMPTY:
# "Directory not empty", likely due to filesystem lag
raise FileRemovalError(exc)
raise exc


class FileRemovalError(CylcError):
"""Exception for errors during deletion of files/directories, which are
probably the filesystem's fault, not Cylc's."""

def __init__(self, exc: Exception) -> None:
CylcError.__init__(
self,
f"{exc}. This is probably a temporary issue with the filesystem, "
"not a problem with Cylc."
)
Comment on lines +119 to +124
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(minor comment more for future sake)

A pattern that is a bit nicer for Python interfaces and testing is to store exc as an attribute on the FileRemovalError and handle the representation in the __str__ method.

Will get around to refactoring the other errors one day when we have less to do 😆.



class TaskRemoteMgmtError(CylcError):
"""Exceptions initialising workflow run directory of remote job host."""

Expand Down
153 changes: 113 additions & 40 deletions cylc/flow/pathutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,19 @@
from pathlib import Path
import re
from shutil import rmtree
from typing import Union
from typing import Dict, Iterable, Set, Union

from cylc.flow import LOG
from cylc.flow.cfgspec.glbl_cfg import glbl_cfg
from cylc.flow.exceptions import WorkflowFilesError
from cylc.flow.exceptions import (
UserInputError, WorkflowFilesError, handle_rmtree_err
)
from cylc.flow.platforms import get_localhost_install_target


# Note: do not import this elsewhere, as it might bypass unit test
# monkeypatching:
_CYLC_RUN_DIR = '$HOME/cylc-run'
_CYLC_RUN_DIR = os.path.join('$HOME', 'cylc-run')


def expand_path(*args: Union[Path, str]) -> str:
Expand Down Expand Up @@ -128,96 +130,107 @@ def make_workflow_run_tree(workflow):
LOG.debug(f'{dir_}: directory created')


def make_localhost_symlinks(rund, named_sub_dir):
def make_localhost_symlinks(
rund: Union[Path, str], named_sub_dir: str
) -> Dict[str, Union[Path, str]]:
"""Creates symlinks for any configured symlink dirs from glbl_cfg.
Args:
rund: the entire run directory path
named_sub_dir: e.g flow_name/run1

Returns:
dict - A dictionary of Symlinks with sources as keys and
destinations as values: ``{source: destination}``
Dictionary of symlinks with sources as keys and
destinations as values: ``{source: destination}``

"""
dirs_to_symlink = get_dirs_to_symlink(
get_localhost_install_target(), named_sub_dir)
symlinks_created = {}
for key, value in dirs_to_symlink.items():
if key == 'run':
dst = rund
symlink_path = rund
else:
dst = os.path.join(rund, key)
src = expand_path(value)
if '$' in src:
symlink_path = os.path.join(rund, key)
target = expand_path(value)
if '$' in target:
raise WorkflowFilesError(
f'Unable to create symlink to {src}.'
f'Unable to create symlink to {target}.'
f' \'{value}\' contains an invalid environment variable.'
' Please check configuration.')
symlink_success = make_symlink(src, dst)
# symlink info returned for logging purposes, symlinks created
# before logs as this dir may be a symlink.
symlink_success = make_symlink(symlink_path, target)
# Symlink info returned for logging purposes. Symlinks should be
# created before logs as the log dir may be a symlink.
if symlink_success:
symlinks_created[src] = dst
symlinks_created[target] = symlink_path
return symlinks_created


def get_dirs_to_symlink(install_target, flow_name):
def get_dirs_to_symlink(install_target: str, flow_name: str) -> Dict[str, str]:
"""Returns dictionary of directories to symlink from glbcfg.
Note the paths should remain unexpanded, to be expanded on the remote.

Note the paths should remain unexpanded, to be expanded on the remote.
"""
dirs_to_symlink = {}
dirs_to_symlink: Dict[str, str] = {}
symlink_conf = glbl_cfg().get(['symlink dirs'])

if install_target not in symlink_conf.keys():
return dirs_to_symlink
base_dir = symlink_conf[install_target]['run']
if base_dir is not None:
if base_dir:
dirs_to_symlink['run'] = os.path.join(base_dir, 'cylc-run', flow_name)
for dir_ in ['log', 'share', 'share/cycle', 'work']:
link = symlink_conf[install_target][dir_]
if link is None or link == base_dir:
if (not link) or link == base_dir:
continue
dirs_to_symlink[dir_] = os.path.join(link, 'cylc-run', flow_name, dir_)
return dirs_to_symlink


def make_symlink(src, dst):
def make_symlink(path: Union[Path, str], target: Union[Path, str]) -> bool:
"""Makes symlinks for directories.

Args:
src (str): target path, where the files are to be stored.
dst (str): full path of link that will point to src.
path: Absolute path of the desired symlink.
target: Absolute path of the symlink's target directory.
"""
if os.path.exists(dst):
if os.path.islink(dst) and os.path.samefile(dst, src):
path = Path(path)
target = Path(target)
if path.exists():
if path.is_symlink() and path.samefile(target):
# correct symlink already exists
return False
# symlink name is in use by a physical file or directory
# log and return
LOG.debug(
f"Unable to create {src} symlink. The path {dst} already exists.")
f"Unable to create symlink to {target}. "
f"The path {path} already exists.")
return False
elif os.path.islink(dst):
elif path.is_symlink():
# remove a bad symlink.
try:
os.unlink(dst)
except Exception:
path.unlink()
except OSError:
raise WorkflowFilesError(
f"Error when symlinking. Failed to unlink bad symlink {dst}.")
os.makedirs(src, exist_ok=True)
os.makedirs(os.path.dirname(dst), exist_ok=True)
f"Error when symlinking. Failed to unlink bad symlink {path}.")
target.mkdir(parents=True, exist_ok=True)
if path.exists():
# Trying to link to itself; no symlink needed
# (e.g. path's parent is symlink to target's parent)
return False
path.parent.mkdir(parents=True, exist_ok=True)
try:
os.symlink(src, dst, target_is_directory=True)
path.symlink_to(target)
return True
except Exception as exc:
except OSError as exc:
raise WorkflowFilesError(f"Error when symlinking\n{exc}")


def remove_dir(path):
"""Delete a directory including contents, including the target directory
if the specified path is a symlink.
def remove_dir_and_target(path: Union[Path, str]) -> None:
"""Delete a directory tree (i.e. including contents), as well as the
target directory tree if the specified path is a symlink.

Args:
path (str): the absolute path of the directory to delete.
path: the absolute path of the directory to delete.
"""
if not os.path.isabs(path):
raise ValueError('Path must be absolute')
Expand All @@ -228,7 +241,7 @@ def remove_dir(path):
target = os.path.realpath(path)
LOG.debug(
f'Removing symlink target directory: ({path} ->) {target}')
rmtree(target)
rmtree(target, onerror=handle_rmtree_err)
LOG.debug(f'Removing symlink: {path}')
else:
LOG.debug(f'Removing broken symlink: {path}')
Expand All @@ -237,7 +250,27 @@ def remove_dir(path):
raise FileNotFoundError(path)
else:
LOG.debug(f'Removing directory: {path}')
rmtree(path)
rmtree(path, onerror=handle_rmtree_err)


def remove_dir_or_file(path: Union[Path, str]) -> None:
"""Delete a directory tree, or a file, or a symlink.
Does not follow symlinks.

Args:
path: the absolute path of the directory/file/symlink to delete.
"""
if not os.path.isabs(path):
raise ValueError("Path must be absolute")
if os.path.islink(path):
LOG.debug(f"Removing symlink: {path}")
os.remove(path)
elif os.path.isfile(path):
LOG.debug(f"Removing file: {path}")
os.remove(path)
else:
LOG.debug(f"Removing directory: {path}")
rmtree(path, onerror=handle_rmtree_err)


def get_next_rundir_number(run_path):
Expand All @@ -249,3 +282,43 @@ def get_next_rundir_number(run_path):
return int(last_run_num) + 1
except OSError:
return 1


def parse_rm_dirs(rm_dirs: Iterable[str]) -> Set[str]:
"""Parse a list of possibly colon-separated dirs (or files or globs).
Return the set of all the dirs.

Used by cylc clean with the --rm option.
"""
result: Set[str] = set()
for item in rm_dirs:
for part in item.split(':'):
part = part.strip()
if not part:
continue
is_dir = part.endswith(os.sep)
part = os.path.normpath(part)
if os.path.isabs(part):
raise UserInputError("--rm option cannot take absolute paths")
if part == '.' or part.startswith(f'..{os.sep}'):
raise UserInputError(
"--rm option cannot take paths that point to the "
"run directory or above"
)
if is_dir:
# Preserve trailing slash to ensure it only matches dirs,
# not files, when globbing
part += os.sep
result.add(part)
return result


def is_relative_to(path1: Union[Path, str], path2: Union[Path, str]) -> bool:
"""Return whether or not path1 is relative to path2."""
# In future, we can just use pathlib.Path.is_relative_to()
# when Python 3.9 becomes the minimum supported version
try:
Path(os.path.normpath(path1)).relative_to(os.path.normpath(path2))
except ValueError:
return False
return True
Loading