diff --git a/src/phenopacket_mapper/utils/io/read_xml.py b/src/phenopacket_mapper/utils/io/read_xml.py index 4ed3a86..c2e7e5d 100644 --- a/src/phenopacket_mapper/utils/io/read_xml.py +++ b/src/phenopacket_mapper/utils/io/read_xml.py @@ -55,10 +55,27 @@ def parse_primitive_value(value: str): return dict_ +def remove_at_symbols(dict_: Dict) -> Dict: + if isinstance(dict_, dict): + new_dict = {} + for k, v in dict_.items(): + if k.startswith("@"): + k = k[1:] + if isinstance(v, list): + new_dict[k] = [remove_at_symbols(item) for item in v] + else: + new_dict[k] = remove_at_symbols(v) + + return new_dict + else: + return dict_ + + def parse_xml(file: IOBase) -> Dict: """Parse an XML file into a dictionary with inferred types.""" dict_ = xmltodict.parse(file.read()) print(f"{dict_=}, {type(dict_)=}") dict_ = _post_process_xml_dict(dict_) + dict_ = remove_at_symbols(dict_) return dict_ diff --git a/tests/utils/io/test_read_xml.py b/tests/utils/io/test_read_xml.py index b9d44f9..b92b0c3 100644 --- a/tests/utils/io/test_read_xml.py +++ b/tests/utils/io/test_read_xml.py @@ -1,4 +1,5 @@ from phenopacket_mapper.utils.io import read_xml +from phenopacket_mapper.utils.io.read_xml import remove_at_symbols import pytest from io import StringIO @@ -19,16 +20,51 @@ ('false', {"boolean": False}), ('123', {"array": {"item": [1, 2, 3]}}), ('' - '123' - 'true' - 'gold' - '123' - 'bd' - 'Hello World' - '', - {"root":{"array": {"item": [1, 2, 3]}, "boolean": True, "color": "gold", "number": 123, "object": {"a": "b", "c": "d"}, "string": "Hello World"}} - ) + '' + '1' + '2' + '3' + '' + 'true' + 'gold' + '123' + '' + 'b' + 'd' + '' + 'Hello World' + '', + { + "root":{ + "array": { + "item": [1, 2, 3] + }, + "boolean": True, + "color": "gold", + "number": 123, + "object": { + "a": "b", + "c": "d" + }, + "string": "Hello World" + } + }), + ('', {"ItemData": {"ItemOID": "redcap_survey_identifier", "Value": ""}}), ], ) +# TODO test tags inside tags eg d def test_read_xml(inp, expected): assert read_xml(StringIO(inp)) == expected + + +@pytest.mark.parametrize( + "inp,expected", + [ + ({"@a": "b"}, {"a": "b"}), + ({"a": "b"}, {"a": "b"}), + ({'@a': {'@b': 'c'}}, {'a': {'b': 'c'}}), + ({'a': [{'@b': 'c'}, {'@d': 'e'}]}, {'a': [{'b': 'c'}, {'d': 'e'}]}), + ] +) +def test_remove_at_symbols(inp, expected): + assert remove_at_symbols(inp) == expected