#119 add bgnpcgn-tuk-cyrl-latn-1979 and bgnpcgn-tuk-cyrl-latn-1993

interscript · Jun 17, 2024 · 332a2d0 · 332a2d0
1 parent c0efd35
commit 332a2d0
Show file tree

Hide file tree

Showing 2 changed files with 237 additions and 0 deletions.
diff --git a/maps/bgnpcgn-tuk-Cyrl-Latn-1979.imp b/maps/bgnpcgn-tuk-Cyrl-Latn-1979.imp
@@ -0,0 +1,129 @@
+metadata {
+  authority_id: bgnpcgn
+  id: 1979
+  language: iso-639-2:tuk
+  source_script: Cyrl
+  destination_script: Latn
+  name: TURKMEN Cyrillic script
+  url: https://transliteration.eki.ee/pdf/Turkmen.pdf
+  creation_date: 1979
+
+  notes:
+    - Ye/ye is used initially, after a vowel, й, ъ, and ь.
+}
+
+tests {
+  # adopted https://assets.publishing.service.gov.uk/media/6329b1638fa8f53cb45763b4/TABLE_OF_CORRESPONDENCES_FOR_TURKMEN_with_examples.pdf
+  test "Aшгабат", "Ashgabat"
+  test "Лебап", "Lebap"
+  test "Ныязов", "Nyyazov"
+  test "Тагта", "Tagta"
+  test "Дашховуз", "Dashkhovuz"
+  test "Небитдаг", "Nebitdag"
+  test "Ёлөтен", "Yolöten"
+  test "Теҗен", "Tejen"
+  test "Газанҗык", "Gazanjyk"
+  test "Керки", "Kerki"
+  test "Бүзмейин", "Büzmeyin"
+  test "Кака", "Kaka"
+  test "Челекен", "Cheleken"
+  test "Түркменбашы", "Türkmenbashy"
+  test "Чаршаңңы", "Charshangngy"
+  test "Мургап", "Murgap"
+  test "Мары", "Mary"
+  test "Сарахс", "Sarakhs"
+  test "Фарап", "Farap"
+  test "Эсенгулы", "Esenguly"
+  test "Әнев", "Änev"
+}
+
+stage {
+  # RULES
+
+  # note[1]
+  sub boundary + "\u0415", "Ye"
+  sub boundary + "\u0435", "ye"
+  sub "\u0415", "Ye", before: any("АаЕеЁёИиЙйОоӨөУуҮүЪъЫыЬьЭэӘәЮюЯя"), after: boundary
+  sub "\u0435", "ye", before: any("АаЕеЁёИиЙйОоӨөУуҮүЪъЫыЬьЭэӘәЮюЯя"), after: boundary
+
+  # CHARACTERS
+  parallel {
+    sub "\u0410", "A" # А
+    sub "\u0411", "B" # Б
+    sub "\u0412", "V" # В
+    sub "\u0413", "G" # Г
+    sub "\u0414", "D" # Д
+    sub "\u0415", "E" # Е note[1]
+    sub "\u0401", "Yo" # Ё
+    sub "\u0416", "Zh" # Ж
+    sub "\u0496", "J" # Җ
+    sub "\u0417", "Z" # З
+    sub "\u0418", "I" # И
+    sub "\u0419", "Y" # Й
+    sub "\u041A", "K" # К
+    sub "\u041B", "L" # Л
+    sub "\u041C", "M" # М
+    sub "\u041D", "N" # Н
+    sub "\u04A2", "Ng" # Ң
+    sub "\u041E", "O" # О
+    sub "\u04E8", "Ö" # Ө
+    sub "\u041F", "P" # П
+    sub "\u0420", "R" # Р
+    sub "\u0421", "S" # С
+    sub "\u0422", "T" # Т
+    sub "\u0423", "U" # У
+    sub "\u04AE", "Ü" # Ү
+    sub "\u0424", "F" # Ф
+    sub "\u0425", "Kh" # Х
+    sub "\u0426", "Ts" # Ц
+    sub "\u0427", "Ch" # Ч
+    sub "\u0428", "Sh" # Ш
+    sub "\u0429", "Shch" # Щ
+    sub "\u042A", "\"" # Ъ
+    sub "\u042B", "Y" # Ы
+    sub "\u042C", "'" # Ь
+    sub "\u042D", "E" # Э
+    sub "\u04D8", "Ä" # Ә
+    sub "\u042E", "Yu" # Ю
+    sub "\u042F", "Ya" # Я
+
+    sub "\u0430", "a" # а
+    sub "\u0431", "b" # б
+    sub "\u0432", "v" # в
+    sub "\u0433", "g" # г
+    sub "\u0434", "d" # д
+    sub "\u0435", "e" # e note[1]
+    sub "\u0451", "yo" # ё
+    sub "\u0436", "zh" # ж
+    sub "\u0497", "j" # җ
+    sub "\u0437", "z" # з
+    sub "\u0438", "i" # и
+    sub "\u0439", "y" # й
+    sub "\u043A", "k" # к
+    sub "\u043B", "l" # л
+    sub "\u043C", "m" # м
+    sub "\u043D", "n" # н
+    sub "\u04A3", "ng" # ң
+    sub "\u043E", "o" # о
+    sub "\u04E9", "ö" # ө
+    sub "\u043F", "p" # п
+    sub "\u0440", "r" # р
+    sub "\u0441", "s" # с
+    sub "\u0442", "t" # т
+    sub "\u0443", "u" # у
+    sub "\u04af", "ü" # ү
+    sub "\u0444", "f" # ф
+    sub "\u0445", "kh" # х
+    sub "\u0446", "ts" # ц
+    sub "\u0447", "ch" # ч
+    sub "\u0448", "sh" # ш
+    sub "\u0449", "shch" # щ
+    sub "\u044A", "\"" # ъ
+    sub "\u044B", "y" # ы
+    sub "\u044C", "'" # ь
+    sub "\u044D", "e" # э
+    sub "\u04D9", "ä" # ә
+    sub "\u044E", "yu" # ю
+    sub "\u044F", "ya" # я
+  }
+}
diff --git a/maps/bgnpcgn-tuk-Cyrl-Latn-1993.imp b/maps/bgnpcgn-tuk-Cyrl-Latn-1993.imp
@@ -0,0 +1,108 @@
+metadata {
+  authority_id: bgnpcgn
+  id: 1993
+  language: iso-639-2:tuk
+  source_script: Cyrl
+  destination_script: Latn
+  name: Table of Correspondences for Turkmen (Cyrillic-Roman) (2000 Agreement)
+  url: https://assets.publishing.service.gov.uk/media/6329b1638fa8f53cb45763b4/TABLE_OF_CORRESPONDENCES_FOR_TURKMEN_with_examples.pdf
+  creation_date: 1993
+  confirmation_date: 2022
+  description: |
+    In 1993, the Turkmen government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet.  When Turkmen Roman-alphabet spellings are not available, this table can be used to convert Turkmen Cyrillic spellings.  This table of correspondences supersedes the BGN/PCGN 1979 romanization system for Turkmen. 
+
+  notes:
+    - The letter sequence ýe is used initially, after the vowel characters 33, 35, 37, and 38,
+    - and after character 12. 2 Rarely encountered.
+    - |
+      If the letter Ұ (Unicode 04BE) is encountered, it should be rendered Ý.
+      If the letter ÿ (Unicode 00FF) is encountered, it should be rendered ý.
+    - |
+      If the character £ (Unicode 00A3) is encountered, it should be rendered Ž.
+      If the character ſ (Unicode 017F) is encountered, it should be rendered ž.
+    - If the letter-diacritic combination ñ (Unicode 00F1) is encountered, it should be rendered ň.
+    - |
+      An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
+        Ý (U+00DD) ý (U+00FD)
+        Ž (U+017D) ž (U+017E)
+        Ü (U+00DC) ü (U+00FC)
+        Ç (U+00C7) ç (U+00E7)
+        Ş (U+015E) ş (U+015F)
+        Ň (U+0147) ň (U+0148)
+        Ö (U+00D6) ö (U+00F6)
+        Ä (U+00C4) ä (U+00E4)
+    - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
+}
+
+tests {
+  test "Aшгабат", "Aşgabat"
+  test "Лебап", "Lebap"
+  test "Ныязов", "Nyýazow"
+  test "Тагта", "Tagta"
+  test "Дашховуз", "Daşhowuz"
+  test "Небитдаг", "Nebitdag"
+  test "Ёлөтен", "Ýolöten"
+  test "Теҗен", "Tejen"
+  test "Газанҗык", "Gazanjyk"
+  test "Керки", "Kerki"
+  test "Бүзмейин", "Büzmeýin"
+  test "Кака", "Kaka"
+  test "Челекен", "Çeleken"
+  test "Түркменбашы", "Türkmenbaşy"
+  test "Чаршаңңы", "Çarşaňňy"
+  test "Мургап", "Murgap"
+  test "Мары", "Mary"
+  test "Сарахс", "Sarahs"
+  test "Фарап", "Farap"
+  test "Эсенгулы", "Esenguly"
+  test "Әнев", "Änew"
+}
+
+dependency "bgnpcgn-tuk-Cyrl-Latn-1979", as: cyrllatn
+
+stage {
+  # RULES
+
+  # note[1]
+  sub boundary + "\u0415", "Ýe"
+  sub boundary + "\u0435", "ýe"
+  sub "\u0415", "Ýe", before: any("АаЕеЁёИиЙйОоӨөУуҮүЫыЭэӘәЮюЯя"), after: boundary
+  sub "\u0435", "ýe", before: any("АаЕеЁёИиЙйОоӨөУуҮүЫыЭэӘәЮюЯя"), after: boundary
+
+  # CHARACTERS
+  parallel {
+    sub "\u0412", "W"  # В
+    sub "\u0401", "Ýo" # Ё
+    sub "\u0416", "Ž"  # Ж
+    sub "\u0419", "Ý"  # Й
+    sub "\u04A2", "Ň"  # Ң
+    sub "\u0425", "H"  # Х
+    sub "\u0426", "S"  # Ц
+    sub "\u0427", "Ç"  # Ч
+    sub "\u0428", "Ş"  # Ш
+    sub "\u0429", "ŞÇ" # Щ
+    sub "\u042A", ""   # Ъ
+    sub "\u042C", ""   # Ь
+    sub "\u042E", "Ýu" # Ю
+    sub "\u042F", "Ýa" # Я
+
+    sub "\u0432", "w"  # в
+    sub "\u0451", "ýo" # ё
+    sub "\u0436", "ž"  # ж
+    sub "\u0439", "ý"  # й
+    sub "\u04A3", "ň"  # ң
+    sub "\u04af", "ü"  # ү
+    sub "\u0444", "f"  # ф
+    sub "\u0445", "h"  # х
+    sub "\u0446", "s"  # ц
+    sub "\u0447", "ç"  # ч
+    sub "\u0448", "ş"  # ш
+    sub "\u0449", "şç" # щ
+    sub "\u044A", ""   # ъ
+    sub "\u044C", ""   # ь
+    sub "\u044E", "ýu" # ю
+    sub "\u044F", "ýa" # я
+  }
+
+  run map.cyrllatn.stage.main
+}