Move CalcJob spec validator to corresponding namespaces

The `CalcJob` process had a single validator defined on the top level input namespace, which validated many ports scattered across the namespace, such as the `metadata.options.parser_name` as well as the `metadata.options.resources`. The validator assumed that all of these ports would always be present, however, this is not necessarily true. The expose functionality allows a wrapping process to expose only part of the namespace but the validator remains the same. To ammeliorate this, the signature of validators is updated to also receive a `context` in the form of a second argument `port` in addition to the value passed to the port. This `port` will be the instance of the port to which the validator is assigned. This allows the validator implementation to first check whether a specific port is present before trying to validate the corresponding value. Note that the `port` will represent the port to which the validator is attached and so it will have no knowledge of any namespace it might be embedded in, as it shouldn't, because that will break the portability of the namespace. The `port` argument being passed to the validator call was introduced in `plumpy==0.14.5` so we upgrade the minimum requirement here. Since that version also requires `pyyaml~=5.1.2` we also update that explicit version in `aiida-core`. Going to `pyyaml==5.2` will break until we fix the serialization and deserialization of process instances that are currently using the `FullLoader` that is no longer allowed to serialize arbitrary python objects as we do.
aiidateam · Jan 22, 2020 · 3a3f88f · 3a3f88f
1 parent 381d941
commit 3a3f88f
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 35 deletions.
diff --git a/aiida/calculations/plugins/arithmetic/add.py b/aiida/calculations/plugins/arithmetic/add.py
@@ -20,9 +20,9 @@ class ArithmeticAddCalculation(CalcJob):
     @classmethod
     def define(cls, spec):
         super().define(spec)
-        spec.input('metadata.options.input_filename', valid_type=str, default='aiida.in', non_db=True)
-        spec.input('metadata.options.output_filename', valid_type=str, default='aiida.out', non_db=True)
-        spec.input('metadata.options.parser_name', valid_type=str, default='arithmetic.add', non_db=True)
+        spec.inputs['metadata']['options']['parser_name'].default = 'arithmetic.add'
+        spec.inputs['metadata']['options']['input_filename'].default = 'aiida.in'
+        spec.inputs['metadata']['options']['output_filename'].default = 'aiida.out'
         spec.input('x', valid_type=(orm.Int, orm.Float), help='The left operand.')
         spec.input('y', valid_type=(orm.Int, orm.Float), help='The right operand.')
         spec.output('sum', valid_type=(orm.Int, orm.Float), help='The sum of the left and right operand.')

diff --git a/aiida/calculations/plugins/templatereplacer.py b/aiida/calculations/plugins/templatereplacer.py
@@ -63,8 +63,7 @@ class TemplatereplacerCalculation(CalcJob):
     def define(cls, spec):
         # yapf: disable
         super().define(spec)
-        spec.input('metadata.options.parser_name', valid_type=str, default='templatereplacer.doubler',
-            non_db=True)
+        spec.inputs['metadata']['options']['parser_name'].default = 'templatereplacer.doubler'
         spec.input('template', valid_type=orm.Dict,
             help='A template for the input file.')
         spec.input('parameters', valid_type=orm.Dict, required=False,

diff --git a/aiida/engine/processes/calcjobs/calcjob.py b/aiida/engine/processes/calcjobs/calcjob.py
@@ -24,24 +24,27 @@
 __all__ = ('CalcJob',)
 
 
-def validate_calc_job(inputs):
+def validate_calc_job(inputs, ctx):
     """Validate the entire set of inputs passed to the `CalcJob` constructor.
 
     Reasons that will cause this validation to raise an `InputValidationError`:
 
      * No `Computer` has been specified, neither directly in `metadata.computer` nor indirectly through the `Code` input
      * The specified computer is not stored
      * The `Computer` specified in `metadata.computer` is not the same as that of the specified `Code`
-     * The `metadata.options.parser_name` does not correspond to a loadable `Parser` class.
-     * The `metadata.options.resources` are invalid for the `Scheduler` of the specified `Computer`.
 
     :raises `~aiida.common.exceptions.InputValidationError`: if inputs are invalid
     """
-    from aiida.plugins import ParserFactory
+    try:
+        ctx.get_port('code')
+        ctx.get_port('metadata.computer')
+    except ValueError:
+        # If the namespace no longer contains the `code` or `metadata.computer` ports we skip validation
+        return
 
-    code = inputs['code']
+    code = inputs.get('code', None)
     computer_from_code = code.computer
-    computer_from_metadata = inputs['metadata'].get('computer', None)
+    computer_from_metadata = inputs.get('metadata', {}).get('computer', None)
 
     if not computer_from_code and not computer_from_metadata:
         raise exceptions.InputValidationError('no computer has been specified in `metadata.computer` nor via `code`.')
@@ -60,31 +63,32 @@ def validate_calc_job(inputs):
             )
         )
 
-    # At this point at least one computer is defined and if both they are the same so we just pick one
-    computer = computer_from_metadata if computer_from_metadata else computer_from_code
 
-    options = inputs['metadata'].get('options', {})
-    parser_name = options.get('parser_name', None)
-    resources = options.get('resources', None)
+def validate_parser(parser_name, ctx):
+    """Validate the parser.
+
+    :raises InputValidationError: if the parser name does not correspond to a loadable `Parser` class.
+    """
+    from aiida.plugins import ParserFactory
 
-    if parser_name is not None:
+    if parser_name is not plumpy.UNSPECIFIED:
         try:
             ParserFactory(parser_name)
         except exceptions.EntryPointError as exception:
             raise exceptions.InputValidationError('invalid parser specified: {}'.format(exception))
 
-    scheduler = computer.get_scheduler()  # pylint: disable=no-member
-    def_cpus_machine = computer.get_default_mpiprocs_per_machine()  # pylint: disable=no-member
 
-    if def_cpus_machine is not None:
-        resources['default_mpiprocs_per_machine'] = def_cpus_machine
+def validate_resources(resources, ctx):
+    """Validate the resources.
 
-    try:
-        scheduler.create_job_resource(**resources)
-    except (TypeError, ValueError) as exception:
-        raise exceptions.InputValidationError(
-            'invalid resources for the scheduler of the specified computer<{}>: {}'.format(computer, exception)
-        )
+    :raises InputValidationError: if `num_machines` is not specified or is not an integer.
+    """
+    if resources is not plumpy.UNSPECIFIED:
+        if 'num_machines' not in resources:
+            raise exceptions.InputValidationError('the `resources` input has to at least include `num_machines`.')
+
+        if not isinstance(resources['num_machines'], int):
+            raise exceptions.InputValidationError('the input `resources.num_machines` shoud be an integer.')
 
 
 class CalcJob(Process):
@@ -125,7 +129,7 @@ def define(cls, spec):
             help='Filename to which the content of stdout of the scheduler will be written.')
         spec.input('metadata.options.scheduler_stderr', valid_type=str, default='_scheduler-stderr.txt',
             help='Filename to which the content of stderr of the scheduler will be written.')
-        spec.input('metadata.options.resources', valid_type=dict, required=True,
+        spec.input('metadata.options.resources', valid_type=dict, required=True, validator=validate_resources,
             help='Set the dictionary of resources to be used by the scheduler plugin, like the number of nodes, '
                  'cpus etc. This dictionary is scheduler-plugin dependent. Look at the documentation of the '
                  'scheduler for more details.')
@@ -161,7 +165,7 @@ def define(cls, spec):
         spec.input('metadata.options.append_text', valid_type=str, default='',
             help='Set the calculation-specific append text, which is going to be appended in the scheduler-job '
                  'script, just after the code execution',)
-        spec.input('metadata.options.parser_name', valid_type=str, required=False,
+        spec.input('metadata.options.parser_name', valid_type=str, required=False, validator=validate_parser,
             help='Set a string for the output parser. Can be None if no output plugin is available or needed')
 
         spec.output('remote_folder', valid_type=orm.RemoteData,

diff --git a/docs/requirements_for_rtd.txt b/docs/requirements_for_rtd.txt
@@ -23,7 +23,7 @@ paramiko~=2.6
 pg8000~=1.13
 pgtest~=1.3,>=1.3.1
 pika~=1.1
-plumpy~=0.14.3
+plumpy~=0.14.5
 psutil~=5.6
 psycopg2-binary~=2.8,>=2.8.3
 pyblake2~=1.1; python_version<'3.6'
@@ -36,7 +36,7 @@ pytest~=5.3
 python-dateutil~=2.8
 python-memcached~=1.59
 pytz~=2019.3
-pyyaml~=5.1
+pyyaml~=5.1.2
 reentry~=1.3
 seekpath~=1.9,>=1.9.3
 simplejson~=3.16

diff --git a/environment.yml b/environment.yml
@@ -22,12 +22,12 @@ dependencies:
 - numpy~=1.17,<1.18
 - paramiko~=2.6
 - pika~=1.1
-- plumpy~=0.14.3
+- plumpy~=0.14.5
 - psutil~=5.6
 - psycopg2~=2.8,>=2.8.3
 - python-dateutil~=2.8
 - pytz~=2019.3
-- pyyaml~=5.1
+- pyyaml~=5.1.2
 - reentry~=1.3
 - simplejson~=3.16
 - sqlalchemy-utils~=0.34.2

diff --git a/setup.json b/setup.json
@@ -36,13 +36,13 @@
     "numpy~=1.17,<1.18",
     "paramiko~=2.6",
     "pika~=1.1",
-    "plumpy~=0.14.3",
+    "plumpy~=0.14.5",
     "psutil~=5.6",
     "psycopg2-binary~=2.8,>=2.8.3",
     "pyblake2~=1.1; python_version<'3.6'",
     "python-dateutil~=2.8",
     "pytz~=2019.3",
-    "pyyaml~=5.1",
+    "pyyaml~=5.1.2",
     "reentry~=1.3",
     "simplejson~=3.16",
     "sqlalchemy-utils~=0.34.2",