Skip to content

Commit

Permalink
Engine: Ignore failing process state change for core.sqlite_dos
Browse files Browse the repository at this point in the history
For each process state change, the engine calls the utility function
`aiida.engine.utils.set_process_state_change_timestamp`. This calls
`set_global_variable` on the storage plugin to update the
`process|state_change|.*` key in the settings table. This value is used
in `verdi process list` to show when the last time a process changed its
state, which serves as a proxy of daemon activity.

When multiple processes would be running, this call would throw an
exception for the `core.sqlite_dos` storage plugin. This is because
SQLite does not support concurrent writes that touch the same page,
which is the case when multiple writes are updating the same row.

Since the updating of the timestamp is not crucial for AiiDA functioning
properly, especially since it is because another process was trying to
perform the same update, it is safe to ignore the failed update and
simply log that as a warning.
  • Loading branch information
sphuber committed Jul 24, 2024
1 parent 5611dda commit 1b8c58b
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 1 deletion.
14 changes: 13 additions & 1 deletion src/aiida/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ def set_process_state_change_timestamp(node: 'ProcessNode') -> None:
:param process: the Process instance that changed its state
"""
from sqlalchemy.exc import OperationalError

from aiida.common import timezone
from aiida.manage import get_manager
from aiida.orm import CalculationNode, ProcessNode, WorkflowNode
Expand All @@ -287,7 +289,17 @@ def set_process_state_change_timestamp(node: 'ProcessNode') -> None:
value = timezone.now().isoformat()

backend = get_manager().get_profile_storage()
backend.set_global_variable(key, value, description)

try:
backend.set_global_variable(key, value, description)
except OperationalError:
# This typically happens for SQLite-based storage plugins like ``core.sqlite_dos``. Since this is merely an
# update of a timestamp that is likely to be updated soon again, just ignoring the failed update is not a
# problem.
LOGGER.warning(
f'Failed to write global variable `{key}` to `{value}` because the database was locked. If the storage '
'plugin being used is `core.sqlite_dos` this is to be expected and can be safely ignored.'
)


def get_process_state_change_timestamp(process_type: Optional[str] = None) -> Optional[datetime]:
Expand Down
52 changes: 52 additions & 0 deletions tests/engine/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,19 @@
"""Test engine utilities such as the exponential backoff mechanism."""

import asyncio
import contextlib

import pytest
from aiida import orm
from aiida.engine import calcfunction, workfunction
from aiida.engine.utils import (
InterruptableFuture,
exponential_backoff_retry,
get_process_state_change_timestamp,
instantiate_process,
interruptable_task,
is_process_function,
set_process_state_change_timestamp,
)

ITERATION = 0
Expand Down Expand Up @@ -225,3 +228,52 @@ async def coro():

result = await task_fut
assert result == 'NOT ME!!!'


@pytest.mark.parametrize('with_transaction', (True, False))
@pytest.mark.parametrize('monkeypatch_process_state_change', (True, False))
def test_set_process_state_change_timestamp(manager, with_transaction, monkeypatch_process_state_change, monkeypatch):
"""Test :func:`aiida.engine.utils.set_process_state_change_timestamp`.
This function is known to except when the ``core.sqlite_dos`` storage plugin is used and multiple processes are run.
The function is called each time a process changes state and since it is updating the same row in the settings table
the limitation of SQLite to not allow concurrent writes to the same page causes an exception to be thrown because
the database is locked. This exception is caught in ``set_process_state_change_timestamp`` and simply is ignored.
This test makes sure that if this happens, any other state changes, e.g. an extra being set on a node, are not
accidentally reverted, when the changes are performed in an explicit transaction or not.
"""
storage = manager.get_profile_storage()

node = orm.CalculationNode().store()
extra_key = 'some_key'
extra_value = 'some value'

# Initialize the process state change timestamp so it is possible to check whether it was changed or not at the
# end of the test.
set_process_state_change_timestamp(node)
current_timestamp = get_process_state_change_timestamp()
assert current_timestamp is not None

if monkeypatch_process_state_change:

def set_global_variable(*_, **__):
from sqlalchemy.exc import OperationalError

raise OperationalError('monkey failure', None, '', '')

monkeypatch.setattr(storage, 'set_global_variable', set_global_variable)

transaction_context = storage.transaction if with_transaction else contextlib.nullcontext

with transaction_context():
node.base.extras.set(extra_key, extra_value)
set_process_state_change_timestamp(node)

# The node extra should always have been set, regardless if the process state change excepted
assert node.base.extras.get(extra_key) == extra_value

# The process state change should have changed if the storage plugin was not monkeypatched to fail
if monkeypatch_process_state_change:
assert get_process_state_change_timestamp() == current_timestamp
else:
assert get_process_state_change_timestamp() != current_timestamp

0 comments on commit 1b8c58b

Please sign in to comment.