-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathg2p.foma
126 lines (104 loc) · 3.65 KB
/
g2p.foma
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Copyright (C) 2023 Darko Milosevic <daremc86@gmail.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3.0 of the License.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# CyrAlphabet defines all letters including apostrophe for schwa and vocalic R
# CyrA also defines a stress mark for forcing stress
source cyrillic_alphabet.foma
define Word [CLetter|StressMark]+ ;
define InitialLetterMapping
î : i0 | # debug schwa !
а : a0 |
е : e0 | # accent is only for disambiguation. Could be removed when capitalized
и : ii0 | # accent is only for disambiguation. Could be removed when capitalized
о : o0 |
у : u0 |
ј : j |
б : b |
ђ : dj |
г: g |
д : d |
џ : dzh |
ѕ : dz |
ж : zh |
з : z |
к : k |
л : l |
м : m |
н : n |
њ : nj |
п : p |
р : r |
с : s |
т : t |
ф : f |
х : h |
ц : ts |
ч : tsh |
ш : sh |
в : v |
љ : lj |
ћ : tj |
ë : ax0 ;
define InitialTranscription InitialLetterMapping @-> ;
source stress_in_text.foma
# we define how to mark an unstressed vowel as stressed.
# Note that an 'r' phoneme between two consonants can be treated as a semi-vowel to
# form a syllable. e.g / ts r k v a /
# when moving to Macednian phonemes, let's try for a semi-vowel, "ry"
define tgStress a0:a1|e0:e1|ii0:ii1|i0:i1|o0:o1|u0:u1|ry0:ry1|ax0:ax1;
define noStressV a0 | e0 | ii0 | o0 | u0 | i0 | ry0 | ax0 ;
define stressV a1 | e1 | ii1 | o1 | u1 | i1 | ax1 ;
define vowel noStressV | stressV;
define pc \vowel;
define pcx pc - StressMark;
define noStress noStressV | pc | StressMark;
define vowelizeR r -> [i0 r] || pc _ pcx ,,
ax0 -> i0 || _ r ;
define markedStressRule StressMark tgStress -> || _ ; # convert any unstresssed vowel preceded by a stress mark
define stressedSuffixes # these should really go into stress dictionary
[ii0 r a0 m] : [ii1 r a0 m ] | # ирам
[ii0 r a0 sh] : [ii1 r a0 sh] | # ираш
[ii0 r a0] : [ii1 r a0] | # ира
[ii0 r a0 l] : [ii1 r a0 l] | # ирал
[ii0 r a0 j] : [ii1 r a0 j] | # ирај
[ii0 r a0 o0] : [ii1 r a0 o0] | # ираo
[l o0 g] : [l o1 g ] | # лог
[f o0 n] : [f o1 n] | # фон
[f o0 n i0] : [f o1 n i0] ; # фони
define stressSuffixRule stressedSuffixes -> || .#. noStress* _ .#.;
define antePenultStressRule tgStress -> || .#. noStress* _ pc* noStressV pc* noStressV pc* .#.;
define frontStressRule tgStress -> || .#. pc* _ noStress* .#.;
define StressRules
markedStressRule
.o. [StressMark -> 0]
### .o. stressSuffixRule # moved to stress dictionary
.o. antePenultStressRule
.o. frontStressRule
;
source latin_to_cyrillic.foma
source unstressed_dictionary.foma
source downcase.foma
source stress_dictionary.foma
define TranscriptionRules
Word .o.
InitialTranscription
.o. vowelizeR ;
# To be written
# In order to handle words which are in the stress dictionary which might be in uppercase and/or latin
# we need to take the Downcase .o. LatinConvert out of TranscriptionRules
# and run them before the .P. below.
source abbrevs.foma
define Rules
[AbbrevTranslation] .P.
[Downcase .o. LatinConvert] .o.
[[UnstressedWords .o. TranscriptionRules] .P.
[ApplyStressDictionary .o. TranscriptionRules .o. StressRules]]
;
regex Rules ;