Skip to content

Commit

Permalink
Merge pull request #176 from BIH-CEI/175-remove-symbols-when-reading-…
Browse files Browse the repository at this point in the history
…xmls

added remove at symbols and tested it
  • Loading branch information
frehburg authored Oct 15, 2024
2 parents cb9221f + 7b7a8c0 commit 73c8c5a
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 9 deletions.
17 changes: 17 additions & 0 deletions src/phenopacket_mapper/utils/io/read_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,27 @@ def parse_primitive_value(value: str):
return dict_


def remove_at_symbols(dict_: Dict) -> Dict:
if isinstance(dict_, dict):
new_dict = {}
for k, v in dict_.items():
if k.startswith("@"):
k = k[1:]
if isinstance(v, list):
new_dict[k] = [remove_at_symbols(item) for item in v]
else:
new_dict[k] = remove_at_symbols(v)

return new_dict
else:
return dict_


def parse_xml(file: IOBase) -> Dict:
"""Parse an XML file into a dictionary with inferred types."""
dict_ = xmltodict.parse(file.read())
print(f"{dict_=}, {type(dict_)=}")
dict_ = _post_process_xml_dict(dict_)
dict_ = remove_at_symbols(dict_)
return dict_

54 changes: 45 additions & 9 deletions tests/utils/io/test_read_xml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from phenopacket_mapper.utils.io import read_xml
from phenopacket_mapper.utils.io.read_xml import remove_at_symbols

import pytest
from io import StringIO
Expand All @@ -19,16 +20,51 @@
('<boolean>false</boolean>', {"boolean": False}),
('<array><item>1</item><item>2</item><item>3</item></array>', {"array": {"item": [1, 2, 3]}}),
('<root>'
'<array><item>1</item><item>2</item><item>3</item></array>'
'<boolean>true</boolean>'
'<color>gold</color>'
'<number>123</number>'
'<object><a>b</a><c>d</c></object>'
'<string>Hello World</string>'
'</root>',
{"root":{"array": {"item": [1, 2, 3]}, "boolean": True, "color": "gold", "number": 123, "object": {"a": "b", "c": "d"}, "string": "Hello World"}}
)
'<array>'
'<item>1</item>'
'<item>2</item>'
'<item>3</item>'
'</array>'
'<boolean>true</boolean>'
'<color>gold</color>'
'<number>123</number>'
'<object>'
'<a>b</a>'
'<c>d</c>'
'</object>'
'<string>Hello World</string>'
'</root>',
{
"root":{
"array": {
"item": [1, 2, 3]
},
"boolean": True,
"color": "gold",
"number": 123,
"object": {
"a": "b",
"c": "d"
},
"string": "Hello World"
}
}),
('<ItemData ItemOID="redcap_survey_identifier" Value=""/>', {"ItemData": {"ItemOID": "redcap_survey_identifier", "Value": ""}}),
],
)
# TODO test tags inside tags eg <a b="c">d</a>
def test_read_xml(inp, expected):
assert read_xml(StringIO(inp)) == expected


@pytest.mark.parametrize(
"inp,expected",
[
({"@a": "b"}, {"a": "b"}),
({"a": "b"}, {"a": "b"}),
({'@a': {'@b': 'c'}}, {'a': {'b': 'c'}}),
({'a': [{'@b': 'c'}, {'@d': 'e'}]}, {'a': [{'b': 'c'}, {'d': 'e'}]}),
]
)
def test_remove_at_symbols(inp, expected):
assert remove_at_symbols(inp) == expected

0 comments on commit 73c8c5a

Please sign in to comment.