MPAS-Dev · xylar · Apr 28, 2022 · Apr 24, 2022 · Apr 25, 2022 · Apr 25, 2022
diff --git a/compass/default.cfg b/compass/default.cfg
@@ -36,3 +36,24 @@ format = NETCDF3_64BIT
 # required
 engine = scipy
 
+
+# Config options related to creating a job script
+[job]
+
+# the name of the parallel job
+job_name = <<<default>>>
+
+# wall-clock time
+wall_time = 1:00:00
+
+# The job partition to use, by default, taken from the first partition (if any)
+# provided for the machine by mache
+partition = <<<default>>>
+
+# The job quality of service (QOS) to use, by default, taken from the first
+# qos (if any) provided for the machine by mache
+qos = <<<default>>>
+
+# The job constraint to use, by default, taken from the first constraint (if
+# any) provided for the  machine by mache
+constraint = <<<default>>>
diff --git a/compass/job/__init__.py b/compass/job/__init__.py
@@ -0,0 +1,122 @@
+from jinja2 import Template
+from importlib import resources
+import os
+import numpy as np
+
+
+def write_job_script(config, machine, target_cores, min_cores, work_dir,
+                     suite=''):
+    """
+
+    Parameters
+    ----------
+    config : compass.config.CompassConfigParser
+        Configuration options for this test case, a combination of user configs
+        and the defaults for the machine and MPAS core
+
+    machine : {str, None}
+        The name of the machine
+
+    target_cores : int
+        The target number of cores for the job to use
+
+    min_cores : int
+        The minimum number of cores for the job to use
+
+    work_dir : str
+        The work directory where the job script should be written
+
+    suite : str, optional
+        The name of the suite
+    """
+
+    if config.has_option('parallel', 'account'):
+        account = config.get('parallel', 'account')
+    else:
+        account = ''
+
+    cores_per_node = config.getint('parallel', 'cores_per_node')
+
+    # as a rule of thumb, let's do the geometric mean between min and target
+    cores = np.sqrt(target_cores*min_cores)
+    nodes = int(np.ceil(cores/cores_per_node))
+
+    partition = config.get('job', 'partition')
+    if partition == '<<<default>>>':
+        if machine == 'anvil':
+            # choose the partition based on the number of nodes
+            if nodes <= 5:
+                partition = 'acme-small'
+            elif nodes <= 60:
+                partition = 'acme-medium'
+            else:
+                partition = 'acme-large'
+        elif config.has_option('parallel', 'partitions'):
+            # get the first, which is the default
+            partition = config.getlist('parallel', 'partitions')[0]
+        else:
+            partition = ''
+
+    qos = config.get('job', 'qos')
+    if qos == '<<<default>>>':
+        if config.has_option('parallel', 'qos'):
+            # get the first, which is the default
+            qos = config.getlist('parallel', 'qos')[0]
+        else:
+            qos = ''
+
+    constraint = config.get('job', 'constraint')
+    if constraint == '<<<default>>>':
+        if config.has_option('parallel', 'constraints'):
+            # get the first, which is the default
+            constraint = config.getlist('parallel', 'constraints')[0]
+        else:
+            constraint = ''
+
+    job_name = config.get('job', 'job_name')
+    if job_name == '<<<default>>>':
+        if suite == '':
+            job_name = 'compass'
+        else:
+            job_name = f'compass_{suite}'
+    wall_time = config.get('job', 'wall_time')
+
+    template = Template(resources.read_text(
+        'compass.job', 'template.sh'))
+
+    text = template.render(job_name=job_name, account=account,
+                           nodes=f'{nodes}', wall_time=wall_time, qos=qos,
+                           partition=partition, constraint=constraint,
+                           suite=suite)
+    text = _clean_up_whitespace(text)
+    if suite == '':
+        script_filename = 'compass_job_script.sh'
+    else:
+        script_filename = f'compass_job_script.{suite}.sh'
+    script_filename = os.path.join(work_dir, script_filename)
+    with open(script_filename, 'w') as handle:
+        handle.write(text)
+
+
+def _clean_up_whitespace(text):
+    prev_line = None
+    lines = text.split('\n')
+    trimmed = list()
+    # remove extra blank lines
+    for line in lines:
+        if line != '' or prev_line != '':
+            trimmed.append(line)
+            prev_line = line
+
+    line = ''
+    lines = list()
+    # remove blank lines between comments
+    for next_line in trimmed:
+        if line != '' or not next_line.startswith('#'):
+            lines.append(line)
+        line = next_line
+
+    # add the last line that we missed and an extra blank line
+    lines.extend([trimmed[-1], ''])
+    text = '\n'.join(lines)
+    return text
diff --git a/compass/job/template.sh b/compass/job/template.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+#SBATCH  --job-name={{ job_name }}
+{% if account != '' -%}
+#SBATCH  --account={{ account}}
+{%- endif %}
+#SBATCH  --nodes={{ nodes }}
+#SBATCH  --output={{ job_name }}.o%j
+#SBATCH  --exclusive
+#SBATCH  --time={{ wall_time }}
+{% if qos != '' -%}
+#SBATCH  --qos={{ qos }}
+{%- endif %}
+{% if partition != '' -%}
+#SBATCH  --partition={{ partition }}
+{%- endif %}
+{% if constraint != '' -%}
+#SBATCH  --constraint={{ constraint }}
+{%- endif %}
+
+source load_compass_env.sh
+compass run {{suite}}
+
diff --git a/compass/machines/badger.cfg b/compass/machines/badger.cfg
@@ -35,3 +35,17 @@ spack = /usr/projects/climate/SHARED_CLIMATE/compass/badger/spack
 # whether to use the same modules for hdf5, netcdf-c, netcdf-fortran and
 # pnetcdf as E3SM (spack modules are used otherwise)
 use_e3sm_hdf5_netcdf = False
+
+
+# The parallel section describes options related to running jobs in parallel
+[parallel]
+
+# account for running diagnostics jobs
+account =
+
+
+# Config options related to creating a job script
+[job]
+
+# The job quality of service (QOS) to use
+qos =
diff --git a/compass/machines/cori-haswell.cfg b/compass/machines/cori-haswell.cfg
@@ -38,3 +38,11 @@ use_e3sm_hdf5_netcdf = True
 
 # the version of ESMF to build if using system compilers and MPI (don't build)
 esmf = None
+
+
+# Config options related to creating a job script
+[job]
+
+# The job constraint to use, by default, taken from the first constraint (if
+# any) provided for the  machine by mache
+constraint = haswell
diff --git a/compass/setup.py b/compass/setup.py
@@ -10,6 +10,7 @@
 from compass.config import CompassConfigParser
 from compass.io import symlink
 from compass import provenance
+from compass.job import write_job_script
 
 
 def setup_cases(tests=None, numbers=None, config_file=None, machine=None,
@@ -163,8 +164,12 @@ def setup_cases(tests=None, numbers=None, config_file=None, machine=None,
 
     max_cores, max_of_min_cores = _get_required_cores(test_cases)
 
-    print('target cores: {}'.format(max_cores))
-    print('minimum cores: {}'.format(max_of_min_cores))
+    print(f'target cores: {max_cores}')
+    print(f'minimum cores: {max_of_min_cores}')
+
+    if machine is not None:
+        write_job_script(basic_config, machine, max_cores, max_of_min_cores,
+                         work_dir, suite=suite_name)
 
     return test_cases
 
@@ -310,6 +315,11 @@ def setup_case(path, test_case, config_file, machine, work_dir, baseline_dir,
         symlink(script_filename, os.path.join(test_case_dir,
                                               'load_compass_env.sh'))
 
+    if machine is not None:
+        max_cores, max_of_min_cores = _get_required_cores({path: test_case})
+        write_job_script(config, machine, max_cores, max_of_min_cores,
+                         test_case_dir)
+
 
 def main():
     parser = argparse.ArgumentParser(

diff --git a/docs/users_guide/quick_start.rst b/docs/users_guide/quick_start.rst
@@ -332,6 +332,69 @@ should contain a file ``test_case.pickle`` that contains the information
 ``load_compass_env.sh`` is a link to whatever load script you sourced before
 setting up the test case (see :ref:`conda_env`).
 
+Running with a job script
+-------------------------
+
+Alternatively, on supported machines, you can run the test case or suite with
+a job script generated automatically during setup, for example:
+
+.. code-block:: bash
+
+    cd <workdir>/<test_subdir>
+    sbatch job_script.sh
+
+You can edit the job script to change the wall-clock time (1 hour by default)
+or the number of nodes (scaled according to the number of cores require by the
+test cases by default).
+
+.. code-block:: bash
+    #!/bin/bash
+    #SBATCH  --job-name=compass
+    #SBATCH  --account=condo
+    #SBATCH  --nodes=5
+    #SBATCH  --output=compass.o%j
+    #SBATCH  --exclusive
+    #SBATCH  --time=1:00:00
+    #SBATCH  --qos=regular
+    #SBATCH  --partition=acme-small
+
+
+    source load_compass_env.sh
+    compass run
+
+You can also use config options, passed to ``compass suite`` or
+``compass setup`` with ``-f`` in a user config file to control the job script.
+The following are the config options that are relevant to job scripts:
+
+.. code-block:: cfg
+    # The parallel section describes options related to running jobs in parallel
+    [parallel]
+
+    # account for running diagnostics jobs
+    account = condo
+
+    # Config options related to creating a job script
+    [job]
+
+    # the name of the parallel job
+    job_name = compass
+
+    # wall-clock time
+    wall_time = 1:00:00
+
+    # The job partition to use, by default, taken from the first partition (if any)
+    # provided for the machine by mache
+    partition = acme-small
+
+    # The job quality of service (QOS) to use, by default, taken from the first
+    # qos (if any) provided for the machine by mache
+    qos = regular
+
+    # The job constraint to use, by default, taken from the first constraint (if
+    # any) provided for the  machine by mache
+    constraint =
+
+
 .. _suite_overview:
 
 Test Suites