Skip to content

Commit

Permalink
🐛 FIX: max_memory_kb usage for direct scheduler (#4825)
Browse files Browse the repository at this point in the history
In most schedulers the `metadata.options.max_memory_kb` input to a `CalcJob`
specifies the physical or "real" memory limit.
However, for the direct scheduler this imposed a virtual limit with `ulimit -v`.
Virtual memory is inequivalent to physical memory; it can often be much larger,
and one cannot always systematically map physical to virtual memory usage.
Therefore, in this commit: 

1. In all scheduler plugins `virtual_memory_kb` is correctly renamed to `physical_memory_kb`.
2. In the direct scheduler, the `max_memory_kb` limit is ignored,
   since no straightforward way exists to directly limit the physical memory usage.

Co-authored-by: Chris Sewell <chrisj_sewell@hotmail.com>
  • Loading branch information
eimrek and chrisjsewell authored Mar 22, 2021
1 parent 32a6c23 commit 75310e0
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 25 deletions.
13 changes: 2 additions & 11 deletions aiida/schedulers/plugins/direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,8 @@ def _get_submit_script_header(self, job_tmpl):
lines.append('exec 2>&1')

if job_tmpl.max_memory_kb:
try:
virtual_memory_kb = int(job_tmpl.max_memory_kb)
if virtual_memory_kb <= 0:
raise ValueError
except ValueError:
raise ValueError(
'max_memory_kb must be '
"a positive integer (in kB)! It is instead '{}'"
''.format((job_tmpl.max_memory_kb))
)
lines.append(f'ulimit -v {virtual_memory_kb}')
self.logger.warning('Physical memory limiting is not supported by the direct scheduler.')

if not job_tmpl.import_sys_environment:
lines.append('env --ignore-environment \\')

Expand Down
8 changes: 4 additions & 4 deletions aiida/schedulers/plugins/lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,18 +401,18 @@ def _get_submit_script_header(self, job_tmpl):
# TODO: check if this is the memory per node # pylint: disable=fixme
if job_tmpl.max_memory_kb:
try:
virtual_memory_kb = int(job_tmpl.max_memory_kb)
if virtual_memory_kb <= 0:
physical_memory_kb = int(job_tmpl.max_memory_kb)
if physical_memory_kb <= 0:
raise ValueError
except ValueError:
raise ValueError(
'max_memory_kb must be '
"a positive integer (in kB)! It is instead '{}'"
''.format((job_tmpl.MaxMemoryKb))
''.format((job_tmpl.max_memory_kb))
)
# The -M option sets a per-process (soft) memory limit for all the
# processes that belong to this job
lines.append(f'#BSUB -M {virtual_memory_kb}')
lines.append(f'#BSUB -M {physical_memory_kb}')

if job_tmpl.custom_scheduler_commands:
lines.append(job_tmpl.custom_scheduler_commands)
Expand Down
6 changes: 3 additions & 3 deletions aiida/schedulers/plugins/pbspro.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,16 @@ def _get_resource_lines(

if max_memory_kb:
try:
virtual_memory_kb = int(max_memory_kb)
if virtual_memory_kb <= 0:
physical_memory_kb = int(max_memory_kb)
if physical_memory_kb <= 0:
raise ValueError
except ValueError:
raise ValueError(
'max_memory_kb must be '
"a positive integer (in kB)! It is instead '{}'"
''.format((max_memory_kb))
)
select_string += f':mem={virtual_memory_kb}kb'
select_string += f':mem={physical_memory_kb}kb'

return_lines.append(f'#PBS -l {select_string}')
return return_lines
8 changes: 4 additions & 4 deletions aiida/schedulers/plugins/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,18 +382,18 @@ def _get_submit_script_header(self, job_tmpl):
# It is the memory per node, not per cpu!
if job_tmpl.max_memory_kb:
try:
virtual_memory_kb = int(job_tmpl.max_memory_kb)
if virtual_memory_kb <= 0:
physical_memory_kb = int(job_tmpl.max_memory_kb)
if physical_memory_kb <= 0:
raise ValueError
except ValueError:
raise ValueError(
'max_memory_kb must be '
"a positive integer (in kB)! It is instead '{}'"
''.format((job_tmpl.MaxMemoryKb))
''.format((job_tmpl.max_memory_kb))
)
# --mem: Specify the real memory required per node in MegaBytes.
# --mem and --mem-per-cpu are mutually exclusive.
lines.append(f'#SBATCH --mem={virtual_memory_kb // 1024}')
lines.append(f'#SBATCH --mem={physical_memory_kb // 1024}')

if job_tmpl.custom_scheduler_commands:
lines.append(job_tmpl.custom_scheduler_commands)
Expand Down
6 changes: 3 additions & 3 deletions aiida/schedulers/plugins/torque.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def _get_resource_lines(

if max_memory_kb:
try:
virtual_memory_kb = int(max_memory_kb)
if virtual_memory_kb <= 0:
physical_memory_kb = int(max_memory_kb)
if physical_memory_kb <= 0:
raise ValueError
except ValueError:
raise ValueError(
Expand All @@ -92,7 +92,7 @@ def _get_resource_lines(
)
# There is always something before, at least the total #
# of nodes
select_string += f',mem={virtual_memory_kb}kb'
select_string += f',mem={physical_memory_kb}kb'

return_lines.append(f'#PBS -l {select_string}')
return return_lines

0 comments on commit 75310e0

Please sign in to comment.