Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SSH task collection folder removal upon failure #1649

Merged
merged 9 commits into from
Jul 17, 2024
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
**Note**: Numbers like (\#1234) point to closed Pull Requests on the fractal-server repository.

# 2.3.3

This release fixes a SSH-task-collection bug introduced in version 2.3.1.

* SSH features:
* Fix wrong removal of task-package folder upon task-collection failure (\#1649).

# 2.3.2

> **WARNING**: The remove-remote-venv-folder in the SSH task collection is broken (see issue 1633). Do not deploy this version in an SSH-based `fractal-server` instance.
Expand Down
36 changes: 35 additions & 1 deletion fractal_server/ssh/_fabric.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,6 @@ def remove_folder(
folder: Absolute path to a folder that should be removed.
safe_root: If `folder` is not a subfolder of the absolute
`safe_root` path, raise an error.
fractal_ssh:
"""
invalid_characters = {" ", "\n", ";", "$", "`"}

Expand All @@ -307,6 +306,41 @@ def remove_folder(
cmd = f"rm -r {folder}"
self.run_command(cmd=cmd)

def rename_folder(
self,
*,
source: str,
target: str,
) -> None:
"""
Rename a folder remotely via SSH.

This functions calls `mv`, after a few checks on `source` and
`target`. Note that `source` and `target` must be subfolders
of the same parent folder.

Args:
source:
target:
"""
invalid_characters = {" ", "\n", ";", "$", "`"}

if (
not isinstance(source, str)
or not isinstance(target, str)
or len(invalid_characters.intersection(source)) > 0
or len(invalid_characters.intersection(target)) > 0
or not Path(source).is_absolute()
or not Path(target).is_absolute()
or Path(source).parent != Path(target).parent
):
raise ValueError(
f"Invalid `rename_folder` arguments {source=} and {target=}."
)
else:
cmd = f"mv {source} {target}"
self.run_command(cmd=cmd)


def get_ssh_connection(
*,
Expand Down
45 changes: 32 additions & 13 deletions fractal_server/tasks/v2/background_operations_ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def background_collect_pip_ssh(
/ ".fractal"
/ f"{task_pkg.package_name}{package_version}"
).as_posix()

logger.debug(f"{package_env_dir=}")
replacements = [
("__PACKAGE_NAME__", task_pkg.package_name),
("__PACKAGE_ENV_DIR__", package_env_dir),
Expand Down Expand Up @@ -197,10 +197,18 @@ def background_collect_pip_ssh(
# long operations that do not use the db
db.close()

# `remove_venv_folder_upon_failure` is set to True only if
# script 1 goes through, which means that the remote folder
# `package_env_dir` did not already exist. If this remote
# folder already existed, then script 1 fails and the boolean
# flag `remove_venv_folder_upon_failure` remains false.
remove_venv_folder_upon_failure = False
stdout = _customize_and_run_template(
script_filename="_1_create_venv.sh",
**common_args,
)
remove_venv_folder_upon_failure = True

stdout = _customize_and_run_template(
script_filename="_2_upgrade_pip.sh",
**common_args,
Expand Down Expand Up @@ -294,6 +302,7 @@ def background_collect_pip_ssh(

# Finalize (write metadata to DB)
logger.debug("finalising - START")

collection_state = db.get(CollectionStateV2, state_id)
collection_state.data["log"] = log_file_path.open("r").read()
collection_state.data["freeze"] = stdout_pip_freeze
Expand All @@ -312,16 +321,26 @@ def background_collect_pip_ssh(
exception=e,
db=db,
)
try:
logger.info(f"Now delete remote folder {package_env_dir}")
fractal_ssh.remove_folder(
folder=package_env_dir,
safe_root=settings.FRACTAL_SLURM_SSH_WORKING_BASE_DIR,
)
logger.info(f"Deleted remoted folder {package_env_dir}")
except Exception as e:
logger.error(
f"Deleting remote folder failed.\n"
f"Original error:\n{str(e)}"
)
if remove_venv_folder_upon_failure:
try:
logger.info(
f"Now delete remote folder {package_env_dir}"
)
fractal_ssh.remove_folder(
folder=package_env_dir,
safe_root=settings.FRACTAL_SLURM_SSH_WORKING_BASE_DIR, # noqa: E501
)
logger.info(
f"Deleted remoted folder {package_env_dir}"
)
except Exception as e:
logger.error(
f"Removing remote folder failed.\n"
f"Original error:\n{str(e)}"
)
else:
logger.info(
"Not trying to remove remote folder "
f"{package_env_dir}."
)
return
4 changes: 2 additions & 2 deletions fractal_server/tasks/v2/templates/_1_create_venv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ PYTHON=__PYTHON__

TIME_START=$(date +%s)


# Create main folder
# Check that package folder does not exist
if [ -d "$PACKAGE_ENV_DIR" ]; then
write_log "ERROR: Folder $PACKAGE_ENV_DIR already exists. Exit."
exit 1
fi

write_log "START mkdir -p $PACKAGE_ENV_DIR"
mkdir -p $PACKAGE_ENV_DIR
write_log "END mkdir -p $PACKAGE_ENV_DIR"
Expand Down
4 changes: 0 additions & 4 deletions fractal_server/tasks/v2/templates/_2_upgrade_pip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ write_log(){

# Variables to be filled within fractal-server
PACKAGE_ENV_DIR=__PACKAGE_ENV_DIR__
PACKAGE_NAME=__PACKAGE_NAME__
PACKAGE=__PACKAGE__
PYTHON=__PYTHON__
INSTALL_STRING=__INSTALL_STRING__

TIME_START=$(date +%s)

Expand Down
4 changes: 0 additions & 4 deletions fractal_server/tasks/v2/templates/_3_pip_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,8 @@ write_log(){

# Variables to be filled within fractal-server
PACKAGE_ENV_DIR=__PACKAGE_ENV_DIR__
PACKAGE_NAME=__PACKAGE_NAME__
PACKAGE=__PACKAGE__
PYTHON=__PYTHON__
INSTALL_STRING=__INSTALL_STRING__


TIME_START=$(date +%s)

VENVPYTHON=${PACKAGE_ENV_DIR}/bin/python
Expand Down
6 changes: 0 additions & 6 deletions fractal_server/tasks/v2/templates/_4_pip_freeze.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@ write_log(){

# Variables to be filled within fractal-server
PACKAGE_ENV_DIR=__PACKAGE_ENV_DIR__
PACKAGE_NAME=__PACKAGE_NAME__
PACKAGE=__PACKAGE__
PYTHON=__PYTHON__
INSTALL_STRING=__INSTALL_STRING__



VENVPYTHON=${PACKAGE_ENV_DIR}/bin/python

Expand Down
4 changes: 1 addition & 3 deletions fractal_server/tasks/v2/templates/_5_pip_show.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ write_log(){
# Variables to be filled within fractal-server
PACKAGE_ENV_DIR=__PACKAGE_ENV_DIR__
PACKAGE_NAME=__PACKAGE_NAME__
PACKAGE=__PACKAGE__
PYTHON=__PYTHON__
INSTALL_STRING=__INSTALL_STRING__



TIME_START=$(date +%s)
Expand Down
45 changes: 42 additions & 3 deletions tests/v2/00_ssh/test_FractalSSH.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _send_file(remote: str, lock_timeout: float):

def test_folder_utils(tmp777_path, fractal_ssh: FractalSSH):
"""
Test basic working of `mkdir` and `remove_folder` methods.
Test basic working of `mkdir` and `rename_folder` methods.
"""

# Define folder
Expand All @@ -193,8 +193,9 @@ def test_folder_utils(tmp777_path, fractal_ssh: FractalSSH):
print(stdout)
print()

# Remove folder
fractal_ssh.remove_folder(folder=folder, safe_root="/tmp")
# Rename folder
new_folder = (tmp777_path / "nested/new_folder").as_posix()
fractal_ssh.rename_folder(source=folder, target=new_folder)

# Check that folder does not exist
with pytest.raises(RuntimeError) as e:
Expand Down Expand Up @@ -258,3 +259,41 @@ def test_remove_folder_input_validation():
safe_root="/actual_root",
)
print(e.value)


def test_rename_folder_input_validation():
"""
Test input validation of `remove_folder` method.
"""
fake_fractal_ssh = FractalSSH(connection=Connection(host="localhost"))

# Folders which are just invalid
invalid_folders = [
None,
" /somewhere",
"/ somewhere",
"somewhere",
"$(pwd)",
"`pwd`",
]
for folder in invalid_folders:
with pytest.raises(ValueError) as e:
fake_fractal_ssh.rename_folder(source=folder, target="/something")
print(e.value)
with pytest.raises(ValueError) as e:
fake_fractal_ssh.rename_folder(source="/something", target=folder)
print(e.value)

# Folders which are not in the same parent directory
with pytest.raises(ValueError) as e:
fake_fractal_ssh.rename_folder(
source="/something/a", target="/something/else/b"
)
print(e.value)

with pytest.raises(ValueError) as e:
fake_fractal_ssh.rename_folder(
source="/actual_root/../something",
target="/actual_root",
)
print(e.value)
26 changes: 23 additions & 3 deletions tests/v2/00_ssh/test_task_collection_ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,21 @@ async def test_task_collection_ssh(
FRACTAL_SLURM_SSH_WORKING_BASE_DIR=remote_basedir,
)

# CASE 1: successful collection
state = CollectionStateV2()
db.add(state)
await db.commit()
await db.refresh(state)

task_pkg = _TaskCollectPip(
package="fractal_tasks_core",
package_version="1.0.2",
python_version="3.9",
)

background_collect_pip_ssh(
state_id=state.id,
task_pkg=task_pkg,
fractal_ssh=fractal_ssh,
)

await db.refresh(state)
debug(state)
assert state.data["status"] == "OK"
Expand All @@ -58,6 +56,28 @@ async def test_task_collection_ssh(
debug(venv_dir)
assert venv_dir.is_dir()

# CASE 2: Try collecting the same package again
state = CollectionStateV2()
db.add(state)
await db.commit()
await db.refresh(state)
background_collect_pip_ssh(
state_id=state.id,
task_pkg=task_pkg,
fractal_ssh=fractal_ssh,
)

# Check that the second collection failed, since folder already exists
await db.refresh(state)
debug(state)
assert state.data["status"] == "fail"
assert "already exists" in state.data["log"]
# Check that the remote folder was not removed (note: we can do it on the
# host machine, because /tmp is shared with the container)
venv_dir = Path(remote_basedir) / ".fractal/fractal-tasks-core1.0.2"
debug(venv_dir)
assert venv_dir.is_dir()


async def test_task_collection_ssh_failure(
fractal_ssh: FractalSSH,
Expand Down
Loading