From e88afc5be5e46df1439555c6c22d37c5f92d33d8 Mon Sep 17 00:00:00 2001 From: tazend <75485188+tazend@users.noreply.github.com> Date: Thu, 25 Jan 2024 18:01:39 +0100 Subject: [PATCH] Fix a crash in cstr.to_gres_dict when the input is just "gres:gpu" (#334) Slurm allows to pass things like --gres=gpu, which in this case the default value will be just one. However, the char *gres value also really just contains "gres:gpu" when displayed via scontrol, with no indication of the count. The code previously assumed that Slurm internally modifies the char* explicitly to reflect the default count of 1. --- pyslurm/utils/cstr.pyx | 14 ++++++++++++-- tests/unit/test_common.py | 28 ++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/pyslurm/utils/cstr.pyx b/pyslurm/utils/cstr.pyx index 7b0b7c34..412ef5d3 100644 --- a/pyslurm/utils/cstr.pyx +++ b/pyslurm/utils/cstr.pyx @@ -214,6 +214,9 @@ cpdef dict to_gres_dict(char *gres): return {} for item in re.split(",(?=[^,]+?:)", gres_str): + # char *gres might contain just "gres:gpu", without any count. + # If not given, the count is always 1, so default to it. + cnt = typ = "1" # Remove the additional "gres" specifier if it exists if gres_delim in item: @@ -223,15 +226,22 @@ cpdef dict to_gres_dict(char *gres): ":(?=[^:]+?)", item.replace("(", ":", 1).replace(")", "") ) + gres_splitted_len = len(gres_splitted) - name, typ, cnt = gres_splitted[0], gres_splitted[1], 0 + name = gres_splitted[0] + if gres_splitted_len > 1: + typ = gres_splitted[1] # Check if we have a gres type. if typ.isdigit(): cnt = typ typ = None - else: + elif gres_splitted_len > 2: cnt = gres_splitted[2] + else: + # String is somehow malformed, should never happen when the input + # comes from the slurmctld. Ignore if it happens. + continue # Dict Key-Name depends on if we have a gres type or not name_and_typ = f"{name}:{typ}" if typ else name diff --git a/tests/unit/test_common.py b/tests/unit/test_common.py index 4706130f..6bc3f708 100644 --- a/tests/unit/test_common.py +++ b/tests/unit/test_common.py @@ -152,12 +152,32 @@ def test_dict_to_gres_str(self): assert cstr.from_gres_dict("tesla:3,a100:5", "gpu") == expected_str def test_str_to_gres_dict(self): - input_str = "gpu:nvidia-a100:1(IDX:0,1)" - expected = {"gpu:nvidia-a100":{"count": 1, "indexes": "0,1"}} + input_str = "gpu:nvidia-a100:1(IDX:0)" + expected = {"gpu:nvidia-a100": {"count": 1, "indexes": "0"}} assert cstr.to_gres_dict(input_str) == expected - input_str = "gpu:nvidia-a100:1" - expected = {"gpu:nvidia-a100": 1} + input_str = "gpu:nvidia-a100:2(IDX:0,1)" + expected = {"gpu:nvidia-a100": {"count": 2, "indexes": "0,1"}} + assert cstr.to_gres_dict(input_str) == expected + + input_str = "gpu:nvidia-a100:5" + expected = {"gpu:nvidia-a100": 5} + assert cstr.to_gres_dict(input_str) == expected + + input_str = "gpu:nvidia-a100:5,gres:gpu:nvidia-v100:10" + expected = {"gpu:nvidia-a100": 5, "gpu:nvidia-v100": 10} + assert cstr.to_gres_dict(input_str) == expected + + input_str = "gres:gpu:2" + expected = {"gpu": 2} + assert cstr.to_gres_dict(input_str) == expected + + input_str = "gres:gpu" + expected = {"gpu": 1} + assert cstr.to_gres_dict(input_str) == expected + + input_str = "gres:gpu:INVALID_COUNT" + expected = {} assert cstr.to_gres_dict(input_str) == expected def test_gres_from_tres_dict(self):