-
Notifications
You must be signed in to change notification settings - Fork 0
/
apertiumBidict.py
46 lines (40 loc) · 1.65 KB
/
apertiumBidict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#! /usr/bin/python3
import xml.etree.ElementTree as ET
from word import Word, WordPair
class ApertiumBidict:
""" A class that looks for the corresponding pair of a given word by searching in an Apertium dictionary DOM"""
def __init__(self, sLang, tLang, file):
self.sLang = sLang
self.tLang = tLang
self.tree = ET.parse(file)
@staticmethod
def processNode(node, lang):
nodecat = node.find(".//s")
if nodecat is not None:
nodecat = nodecat.get("n")
else:
nodecat = "None"
nodetext = ApertiumBidict.nodeToString(node)
return Word(nodetext, nodecat, lang)
@staticmethod
def nodeToString(node):
return "<b/>".join(x.text if x.text else "" + x.tail if x.tail else "" for x in node.iter() if (x.text or x.tail))
def reel(self, word):
if self.sLang != word.lang and self.tLang != word.lang:
return []
if "<" in word.surface:
return [] #Skip multiwords
toRet = []
if self.sLang == word.lang:
for node in self.tree.findall(".//p[l='" + word.surface + "']/r"):
toRet.append(WordPair(word, ApertiumBidict.processNode(node, self.tLang)))
for node in self.tree.findall(".//e[i='" + word.surface + "']/i"): #No good way to look directly in the text node...
toRet.append(WordPair(word, ApertiumBidict.processNode(node, self.tLang)))
elif self.tLang == word.lang:
for node in self.tree.findall(".//p[r='" + word.surface + "']/l"):
toRet.append(WordPair(word, ApertiumBidict.processNode(node, self.sLang)))
for node in self.tree.findall(".//e[i='" + word.surface + "']/i"):
toRet.append(WordPair(word, ApertiumBidict.processNode(node, self.sLang)))
return toRet
def __str__(self):
return self.sLang + "|" + self.tLang