-
Notifications
You must be signed in to change notification settings - Fork 0
/
apertiumBidictMemory.py
52 lines (45 loc) · 1.69 KB
/
apertiumBidictMemory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import xml.etree.ElementTree as ET
from word import Word, WordPair
from trie import Fixtree
class ApertiumBidict:
""" A class that looks for the corresponding pair of a given word by building a TRIE out of an Apertium dictionary"""
def __init__(self, sLang, tLang, fileName):
self.sLang = sLang
self.tLang = tLang
self.sTree = Fixtree()
self.tTree = Fixtree()
for node in ET.parse(fileName).iterfind(".//p"):
sWord = ApertiumBidict.processNode(node.find("l"), sLang)
tWord = ApertiumBidict.processNode(node.find("r"), tLang)
self.sTree.addData(sWord.surface, tWord)
self.tTree.addData(tWord.surface, sWord)
for node in ET.parse(fileName).iterfind(".//i"):
sWord = ApertiumBidict.processNode(node, sLang)
tWord = ApertiumBidict.processNode(node, tLang)
self.sTree.addData(sWord.surface, tWord)
self.tTree.addData(tWord.surface, sWord)
@staticmethod
def processNode(node, lang):
nodecat = node.find(".//s")
if nodecat is not None:
nodecat = nodecat.get("n")
else:
nodecat = "None"
nodetext = ApertiumBidict.nodeToString(node)
return Word(nodetext, nodecat, lang)
@staticmethod
def nodeToString(node):
return "<b/>".join(x.text if x.text else "" + x.tail if x.tail else "" for x in node.iter() if (x.text or x.tail))
def reel(self, word):
if self.sLang != word.lang and self.tLang != word.lang:
return []
if "<" in word.surface:
return [] #Skip multiwords
toRet = []
if self.sLang == word.lang:
toRet += (WordPair(word, x) for x in self.sTree.getFix(word.surface))
elif self.tLang == word.lang:
toRet += (WordPair(word, x) for x in self.tTree.getFix(word.surface))
return toRet
def __str__(self):
return self.sLang + "|" + self.tLang