-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransliterator.py
154 lines (134 loc) · 5.19 KB
/
transliterator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import json
from token import EHConsonant, EHVowel,HRConsonant,HRVowel
from functools import reduce
class EHParser:
def __init__(self, maximalMap=3):
self.maximalMap = maximalMap
def vowelToggle(self, tokenList):
"""
Modifies the vowels in :tokenList: to print as a vowel
iff the previous token is also a vowel. All other vowels
are printed as matras.
"""
for i in range(len(tokenList)):
if (i == 0 and isinstance(tokenList[i], EHVowel)) or \
(i != 0 and isinstance(tokenList[i], EHVowel) and \
isinstance(tokenList[i - 1], EHVowel)):
tokenList[i].toggle()
return tokenList
def consonantToggle(self, tokenList):
"""
Modifies the consonants in :tokenList: to print as a half character
iff the next token is also a consonant. All other consonants
are printed as full characters.
"""
for i in range(len(tokenList)):
if (i == len(tokenList) - 1 and isinstance(tokenList[i], EHConsonant)) or \
(i != len(tokenList) - 1 and isinstance(tokenList[i], EHConsonant) and \
isinstance(tokenList[i + 1], EHConsonant)):
tokenList[i].toggle()
return tokenList
def genToken(self, inputStr):
"""
Generate maximal character from mapping jsons
"""
# Load mapping dicts
with open('data/consonants.json') as json_file:
consonantDict = json.load(json_file)
with open('data/vowels.json') as json_file:
vowelDict = json.load(json_file)
index = len(inputStr)
while(index > 0):
key = inputStr[:index]
if key in consonantDict:
return EHConsonant(key)
elif key in vowelDict:
return EHVowel(key)
else:
index -= 1
def genTokenList(self, inputStr):
"""
Returns the generated token list after parsing :inputStr:
"""
index = 0
tokenList = []
while(index < len(inputStr)):
t = self.genToken(inputStr[index:index+self.maximalMap])
tokenList.append(t)
index = index + len(t.getChar())
return tokenList
def parse(self, inputStr):
"""
Parses the inputStr and returns a unicode string of the
transliterated string.
"""
tokenList = self.vowelToggle(self.consonantToggle(self.genTokenList(inputStr)))
return reduce(lambda x,y: str(x)+str(y), tokenList)
class HRParser:
def __init__(self, maximalMap=1):
self.maximalMap = maximalMap
def consonantToggle(self, tokenList):
"""
Modifies the consonants in :tokenList: to print as a half character
iff the next token is also a consonant. All other consonants
are printed as full characters.
"""
for i in range(len(tokenList)):
if ((i != len(tokenList) - 1 and isinstance(tokenList[i], HRConsonant) and isinstance(tokenList[i+1], HRConsonant)) or \
(i == len(tokenList) - 1 and isinstance(tokenList[i], HRConsonant))):
tokenList[i].toggle()
return tokenList
def genToken(self, inputStr):
"""
Generate maximal character from mapping jsons
"""
# Load mapping dicts
with open('data/romanconsonants.json') as json_file:
HRconsonantDict = json.load(json_file)
with open('data/romanvowels.json') as json_file:
HRvowelDict = json.load(json_file)
with open('data/romanmatras.json') as json_file:
HRmatraDict = json.load(json_file)
index = len(inputStr)
while(index > 0):
key = inputStr[:index]
if key in HRconsonantDict:
return HRConsonant(key)
elif key in HRvowelDict:
return HRVowel(key)
elif key in HRmatraDict:
return HRVowel(key)
else:
index -= 1
def genTokenList(self, inputStr):
"""
Returns the generated token list after parsing :inputStr:
"""
index = 0
tokenList = []
while(index < len(inputStr)):
t = self.genToken(inputStr[index:index+self.maximalMap])
tokenList.append(t)
index = index + len(t.getChar())
return tokenList
def parse(self, inputStr):
"""
Parses the inputStr and returns a unicode string of the
transliterated string.
"""
tokenList = self.consonantToggle(self.genTokenList(inputStr))
return reduce(lambda x,y: str(x)+str(y), tokenList)
if __name__=="__main__":
import sys
args = sys.argv[1:]
if(len(args) == 2):
if(args[0] == "Hindi"):
parser = EHParser()
print("The transliteration of", args[1], "is", parser.parse(args[1]))
elif(args[0] == "Roman"):
parser = HRParser()
print("The transliteration of", args[1], "is", parser.parse(args[1]))
else:
print("Usage: python3 transliterator.py <Roman|Hindi> <word>")
else:
print("Usage: python3 transliterator.py <Roman|Hindi> <word>")