-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#119 add bgnpcgn-tuk-cyrl-latn-1979 and bgnpcgn-tuk-cyrl-latn-1993
- Loading branch information
Showing
2 changed files
with
237 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
metadata { | ||
authority_id: bgnpcgn | ||
id: 1979 | ||
language: iso-639-2:tuk | ||
source_script: Cyrl | ||
destination_script: Latn | ||
name: TURKMEN Cyrillic script | ||
url: https://transliteration.eki.ee/pdf/Turkmen.pdf | ||
creation_date: 1979 | ||
|
||
notes: | ||
- Ye/ye is used initially, after a vowel, й, ъ, and ь. | ||
} | ||
|
||
tests { | ||
# adopted https://assets.publishing.service.gov.uk/media/6329b1638fa8f53cb45763b4/TABLE_OF_CORRESPONDENCES_FOR_TURKMEN_with_examples.pdf | ||
test "Aшгабат", "Ashgabat" | ||
test "Лебап", "Lebap" | ||
test "Ныязов", "Nyyazov" | ||
test "Тагта", "Tagta" | ||
test "Дашховуз", "Dashkhovuz" | ||
test "Небитдаг", "Nebitdag" | ||
test "Ёлөтен", "Yolöten" | ||
test "Теҗен", "Tejen" | ||
test "Газанҗык", "Gazanjyk" | ||
test "Керки", "Kerki" | ||
test "Бүзмейин", "Büzmeyin" | ||
test "Кака", "Kaka" | ||
test "Челекен", "Cheleken" | ||
test "Түркменбашы", "Türkmenbashy" | ||
test "Чаршаңңы", "Charshangngy" | ||
test "Мургап", "Murgap" | ||
test "Мары", "Mary" | ||
test "Сарахс", "Sarakhs" | ||
test "Фарап", "Farap" | ||
test "Эсенгулы", "Esenguly" | ||
test "Әнев", "Änev" | ||
} | ||
|
||
stage { | ||
# RULES | ||
|
||
# note[1] | ||
sub boundary + "\u0415", "Ye" | ||
sub boundary + "\u0435", "ye" | ||
sub "\u0415", "Ye", before: any("АаЕеЁёИиЙйОоӨөУуҮүЪъЫыЬьЭэӘәЮюЯя"), after: boundary | ||
sub "\u0435", "ye", before: any("АаЕеЁёИиЙйОоӨөУуҮүЪъЫыЬьЭэӘәЮюЯя"), after: boundary | ||
|
||
# CHARACTERS | ||
parallel { | ||
sub "\u0410", "A" # А | ||
sub "\u0411", "B" # Б | ||
sub "\u0412", "V" # В | ||
sub "\u0413", "G" # Г | ||
sub "\u0414", "D" # Д | ||
sub "\u0415", "E" # Е note[1] | ||
sub "\u0401", "Yo" # Ё | ||
sub "\u0416", "Zh" # Ж | ||
sub "\u0496", "J" # Җ | ||
sub "\u0417", "Z" # З | ||
sub "\u0418", "I" # И | ||
sub "\u0419", "Y" # Й | ||
sub "\u041A", "K" # К | ||
sub "\u041B", "L" # Л | ||
sub "\u041C", "M" # М | ||
sub "\u041D", "N" # Н | ||
sub "\u04A2", "Ng" # Ң | ||
sub "\u041E", "O" # О | ||
sub "\u04E8", "Ö" # Ө | ||
sub "\u041F", "P" # П | ||
sub "\u0420", "R" # Р | ||
sub "\u0421", "S" # С | ||
sub "\u0422", "T" # Т | ||
sub "\u0423", "U" # У | ||
sub "\u04AE", "Ü" # Ү | ||
sub "\u0424", "F" # Ф | ||
sub "\u0425", "Kh" # Х | ||
sub "\u0426", "Ts" # Ц | ||
sub "\u0427", "Ch" # Ч | ||
sub "\u0428", "Sh" # Ш | ||
sub "\u0429", "Shch" # Щ | ||
sub "\u042A", "\"" # Ъ | ||
sub "\u042B", "Y" # Ы | ||
sub "\u042C", "'" # Ь | ||
sub "\u042D", "E" # Э | ||
sub "\u04D8", "Ä" # Ә | ||
sub "\u042E", "Yu" # Ю | ||
sub "\u042F", "Ya" # Я | ||
|
||
sub "\u0430", "a" # а | ||
sub "\u0431", "b" # б | ||
sub "\u0432", "v" # в | ||
sub "\u0433", "g" # г | ||
sub "\u0434", "d" # д | ||
sub "\u0435", "e" # e note[1] | ||
sub "\u0451", "yo" # ё | ||
sub "\u0436", "zh" # ж | ||
sub "\u0497", "j" # җ | ||
sub "\u0437", "z" # з | ||
sub "\u0438", "i" # и | ||
sub "\u0439", "y" # й | ||
sub "\u043A", "k" # к | ||
sub "\u043B", "l" # л | ||
sub "\u043C", "m" # м | ||
sub "\u043D", "n" # н | ||
sub "\u04A3", "ng" # ң | ||
sub "\u043E", "o" # о | ||
sub "\u04E9", "ö" # ө | ||
sub "\u043F", "p" # п | ||
sub "\u0440", "r" # р | ||
sub "\u0441", "s" # с | ||
sub "\u0442", "t" # т | ||
sub "\u0443", "u" # у | ||
sub "\u04af", "ü" # ү | ||
sub "\u0444", "f" # ф | ||
sub "\u0445", "kh" # х | ||
sub "\u0446", "ts" # ц | ||
sub "\u0447", "ch" # ч | ||
sub "\u0448", "sh" # ш | ||
sub "\u0449", "shch" # щ | ||
sub "\u044A", "\"" # ъ | ||
sub "\u044B", "y" # ы | ||
sub "\u044C", "'" # ь | ||
sub "\u044D", "e" # э | ||
sub "\u04D9", "ä" # ә | ||
sub "\u044E", "yu" # ю | ||
sub "\u044F", "ya" # я | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
metadata { | ||
authority_id: bgnpcgn | ||
id: 1993 | ||
language: iso-639-2:tuk | ||
source_script: Cyrl | ||
destination_script: Latn | ||
name: Table of Correspondences for Turkmen (Cyrillic-Roman) (2000 Agreement) | ||
url: https://assets.publishing.service.gov.uk/media/6329b1638fa8f53cb45763b4/TABLE_OF_CORRESPONDENCES_FOR_TURKMEN_with_examples.pdf | ||
creation_date: 1993 | ||
confirmation_date: 2022 | ||
description: | | ||
In 1993, the Turkmen government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Turkmen Roman-alphabet spellings are not available, this table can be used to convert Turkmen Cyrillic spellings. This table of correspondences supersedes the BGN/PCGN 1979 romanization system for Turkmen. | ||
|
||
notes: | ||
- The letter sequence ýe is used initially, after the vowel characters 33, 35, 37, and 38, | ||
- and after character 12. 2 Rarely encountered. | ||
- | | ||
If the letter Ұ (Unicode 04BE) is encountered, it should be rendered Ý. | ||
If the letter ÿ (Unicode 00FF) is encountered, it should be rendered ý. | ||
- | | ||
If the character £ (Unicode 00A3) is encountered, it should be rendered Ž. | ||
If the character ſ (Unicode 017F) is encountered, it should be rendered ž. | ||
- If the letter-diacritic combination ñ (Unicode 00F1) is encountered, it should be rendered ň. | ||
- | | ||
An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is: | ||
Ý (U+00DD) ý (U+00FD) | ||
Ž (U+017D) ž (U+017E) | ||
Ü (U+00DC) ü (U+00FC) | ||
Ç (U+00C7) ç (U+00E7) | ||
Ş (U+015E) ş (U+015F) | ||
Ň (U+0147) ň (U+0148) | ||
Ö (U+00D6) ö (U+00F6) | ||
Ä (U+00C4) ä (U+00E4) | ||
- The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used. | ||
} | ||
|
||
tests { | ||
test "Aшгабат", "Aşgabat" | ||
test "Лебап", "Lebap" | ||
test "Ныязов", "Nyýazow" | ||
test "Тагта", "Tagta" | ||
test "Дашховуз", "Daşhowuz" | ||
test "Небитдаг", "Nebitdag" | ||
test "Ёлөтен", "Ýolöten" | ||
test "Теҗен", "Tejen" | ||
test "Газанҗык", "Gazanjyk" | ||
test "Керки", "Kerki" | ||
test "Бүзмейин", "Büzmeýin" | ||
test "Кака", "Kaka" | ||
test "Челекен", "Çeleken" | ||
test "Түркменбашы", "Türkmenbaşy" | ||
test "Чаршаңңы", "Çarşaňňy" | ||
test "Мургап", "Murgap" | ||
test "Мары", "Mary" | ||
test "Сарахс", "Sarahs" | ||
test "Фарап", "Farap" | ||
test "Эсенгулы", "Esenguly" | ||
test "Әнев", "Änew" | ||
} | ||
|
||
dependency "bgnpcgn-tuk-Cyrl-Latn-1979", as: cyrllatn | ||
|
||
stage { | ||
# RULES | ||
|
||
# note[1] | ||
sub boundary + "\u0415", "Ýe" | ||
sub boundary + "\u0435", "ýe" | ||
sub "\u0415", "Ýe", before: any("АаЕеЁёИиЙйОоӨөУуҮүЫыЭэӘәЮюЯя"), after: boundary | ||
sub "\u0435", "ýe", before: any("АаЕеЁёИиЙйОоӨөУуҮүЫыЭэӘәЮюЯя"), after: boundary | ||
|
||
# CHARACTERS | ||
parallel { | ||
sub "\u0412", "W" # В | ||
sub "\u0401", "Ýo" # Ё | ||
sub "\u0416", "Ž" # Ж | ||
sub "\u0419", "Ý" # Й | ||
sub "\u04A2", "Ň" # Ң | ||
sub "\u0425", "H" # Х | ||
sub "\u0426", "S" # Ц | ||
sub "\u0427", "Ç" # Ч | ||
sub "\u0428", "Ş" # Ш | ||
sub "\u0429", "ŞÇ" # Щ | ||
sub "\u042A", "" # Ъ | ||
sub "\u042C", "" # Ь | ||
sub "\u042E", "Ýu" # Ю | ||
sub "\u042F", "Ýa" # Я | ||
|
||
sub "\u0432", "w" # в | ||
sub "\u0451", "ýo" # ё | ||
sub "\u0436", "ž" # ж | ||
sub "\u0439", "ý" # й | ||
sub "\u04A3", "ň" # ң | ||
sub "\u04af", "ü" # ү | ||
sub "\u0444", "f" # ф | ||
sub "\u0445", "h" # х | ||
sub "\u0446", "s" # ц | ||
sub "\u0447", "ç" # ч | ||
sub "\u0448", "ş" # ш | ||
sub "\u0449", "şç" # щ | ||
sub "\u044A", "" # ъ | ||
sub "\u044C", "" # ь | ||
sub "\u044E", "ýu" # ю | ||
sub "\u044F", "ýa" # я | ||
} | ||
|
||
run map.cyrllatn.stage.main | ||
} |