-
Notifications
You must be signed in to change notification settings - Fork 1
/
regexml.py
71 lines (53 loc) · 1.87 KB
/
regexml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import re
import xml.etree.ElementTree
_example = '''<?xml version="1.0" ?>
<expression type="regular" dialect="posix"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="regexml.xsd">
<character encoding="utf-8" locale="en-US"><![CDATA[<]]></character>
<repeat type="oneormore">
<characterset>
<character encoding="utf-8" locale="en-US">a</character>
<character encoding="utf-8" locale="en-US">z</character>
</characterset>
</repeat>
<character encoding="utf-8" locale="en-US"><![CDATA[>]]></character>
<repeat type="zeroormore">
<wildcard />
</repeat>
<character encoding="utf-8" locale="en-US"><![CDATA[<]]></character>
<character encoding="utf-8" locale="en-US">/</character>
<repeat type="oneormore">
<characterset>
<character encoding="utf-8" locale="en-US">a</character>
<character encoding="utf-8" locale="en-US">z</character>
</characterset>
</repeat>
<character encoding="utf-8" locale="en-US"><![CDATA[>]]></character>
</expression>
'''
_REPEAT_TYPE_TO_CHAR = {
'oneormore': '+',
'zeroormore': '*',
}
def emit_node(node):
tag = node.tag
if tag == 'expression':
return ''.join(emit_node(child) for child in node)
if tag == 'character':
return node.text #TODO: escape meta chars
if tag == 'characterset':
return '[' + emit_node(node[0]) + '-' + emit_node(node[1]) + ']' #TODO: support more advanced ranges
if tag == 'repeat':
return ''.join(emit_node(child) for child in node) + _REPEAT_TYPE_TO_CHAR[node.attrib['type']]
if tag == 'wildcard':
return '.'
return ''
def re_from_xml(xml_str):
parse_tree = xml.etree.ElementTree.fromstring(xml_str)
regex_string = emit_node(parse_tree)
return re.compile(regex_string) #TODO: return matches in XML
def main():
print(re_from_xml(_example))
if __name__ == "__main__":
main()