From e383eeed289ca83402f7c0ffa62a8059c8643375 Mon Sep 17 00:00:00 2001 From: btangmu Date: Wed, 8 May 2024 13:09:29 -0400 Subject: [PATCH 1/4] CLDR-17615 Update languages (v46 SBRS), work in progress -So far I did the steps before this one: Run the tool CompareIso3166_1Status --- .../cldr/util/data/iso-639-3-version.tab | 2 +- .../org/unicode/cldr/util/data/iso-639-3.tab | 3 +- .../cldr/util/data/iso-639-3_Name_Index.tab | 3 +- .../cldr/util/data/language-subtag-registry | 78 ++++++++++++++++++- .../cldr/util/data/tlds-alpha-by-domain.txt | 4 +- 5 files changed, 83 insertions(+), 7 deletions(-) diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3-version.tab b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3-version.tab index fb896437271..bc62afe9d00 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3-version.tab +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3-version.tab @@ -1 +1 @@ -20240207 +20240415 diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3.tab b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3.tab index 68c7f608b46..1cb94ac1f56 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3.tab +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3.tab @@ -1553,7 +1553,7 @@ dgk I L Dagba dgl I L Andaandi dgn I E Dagoman dgo I L Dogri (individual language) -dgr dgr dgr I L Dogrib +dgr dgr dgr I L Tlicho dgs I L Dogoso dgt I E Ndra'ngith dgw I E Daungwurrung @@ -2662,6 +2662,7 @@ iso I L Isoko isr I L Israeli Sign Language ist I L Istriot isu I L Isu (Menchum Division) +isv I C Interslavic ita ita ita it I L Italian itb I L Binongan Itneg itd I L Southern Tidung diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3_Name_Index.tab b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3_Name_Index.tab index 8f038e755b8..6b49a0bd316 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3_Name_Index.tab +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/iso-639-3_Name_Index.tab @@ -1616,7 +1616,7 @@ dgl Dongolawi Dongolawi dgn Dagoman Dagoman dgo Dogri (individual language) Dogri (individual language) dgr Dogrib Dogrib -dgr Tłı̨chǫ Tłı̨chǫ +dgr Tlicho Tlicho dgs Dogoso Dogoso dgt Ndra'ngith Ndra'ngith dgw Daungwurrung Daungwurrung @@ -2800,6 +2800,7 @@ iso Isoko Isoko isr Israeli Sign Language Israeli Sign Language ist Istriot Istriot isu Isu (Menchum Division) Isu (Menchum Division) +isv Interslavic Interslavic ita Italian Italian itb Binongan Itneg Itneg, Binongan itd Southern Tidung Tidung, Southern diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language-subtag-registry b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language-subtag-registry index c6937ee80f1..4737c50e425 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language-subtag-registry +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language-subtag-registry @@ -1,4 +1,4 @@ -File-Date: 2023-10-16 +File-Date: 2024-03-07 %% Type: language Subtag: aa @@ -882,6 +882,7 @@ Type: language Subtag: sa Description: Sanskrit Added: 2005-10-16 +Scope: macrolanguage %% Type: language Subtag: sc @@ -8028,6 +8029,12 @@ Description: Lowland Oaxaca Chontal Added: 2009-07-29 %% Type: language +Subtag: cls +Description: Classical Sanskrit +Added: 2024-03-04 +Macrolanguage: sa +%% +Type: language Subtag: clt Description: Lautu Chin Added: 2012-08-12 @@ -30916,6 +30923,11 @@ Description: Ririo Added: 2009-07-29 %% Type: language +Subtag: rrm +Description: Moriori +Added: 2024-03-04 +%% +Type: language Subtag: rro Description: Waima Added: 2009-07-29 @@ -37660,6 +37672,12 @@ Description: Venezuelan Sign Language Added: 2009-07-29 %% Type: language +Subtag: vsn +Description: Vedic Sanskrit +Added: 2024-03-04 +Macrolanguage: sa +%% +Type: language Subtag: vsv Description: Valencian Sign Language Description: Llengua de signes valenciana @@ -47559,6 +47577,13 @@ Comments: Aluku dialect of the "Busi Nenge Tongo" English-based Creole continuum in Eastern Suriname and Western French Guiana %% Type: variant +Subtag: anpezo +Description: Anpezo standard of Ladin +Added: 2024-03-04 +Prefix: lld +Comments: Represents the standard written form of Ladin in Anpezo +%% +Type: variant Subtag: ao1990 Description: Portuguese Language Orthographic Agreement of 1990 (Acordo Ortográfico da Língua Portuguesa de 1990) @@ -47779,6 +47804,22 @@ Added: 2012-02-05 Prefix: en %% Type: variant +Subtag: fascia +Description: Fascia standard of Ladin +Added: 2024-03-04 +Prefix: lld +Comments: Represents the standard written form of Ladin in Fascia which + unified the three subvarieties Cazet, Brach and Moenat +%% +Type: variant +Subtag: fodom +Description: Fodom standard of Ladin +Added: 2024-03-04 +Prefix: lld +Comments: Represents the standard written form of Ladin in Livinallongo + and Colle Santa Lucia +%% +Type: variant Subtag: fonipa Description: International Phonetic Alphabet Added: 2006-12-11 @@ -47819,6 +47860,13 @@ Prefix: oc Comments: Occitan variant spoken in Gascony %% Type: variant +Subtag: gherd +Description: Gherdëina standard of Ladin +Added: 2024-03-04 +Prefix: lld +Comments: Represents the standard written form of Ladin in Gherdëina +%% +Type: variant Subtag: grclass Description: Classical Occitan orthography Added: 2018-04-22 @@ -48120,6 +48168,15 @@ Comments: Peano’s Interlingua, created in 1903 by Giuseppe Peano as an Added: 2020-03-12 %% Type: variant +Subtag: pehoeji +Description: Hokkien Vernacular Romanization System +Description: Pe̍h-ōe-jī orthography/romanization +Added: 2024-03-04 +Prefix: nan-Latn +Comments: Modern Hokkien Vernacular Romanization System, evolved from + the New Dictionary in the Amoy by John Van Nest Talmage in 1894 +%% +Type: variant Subtag: petr1708 Description: Petrine orthography Added: 2010-10-10 @@ -48254,6 +48311,16 @@ Added: 2021-07-17 Prefix: da %% Type: variant +Subtag: tailo +Description: Taiwanese Hokkien Romanization System for Hokkien + languages +Description: Tâi-lô orthography/romanization +Added: 2024-03-04 +Prefix: nan-Latn +Comments: Taiwanese Hokkien Romanization System (Tâi-lô) published in + 2006 by the Taiwan Ministry of Education +%% +Type: variant Subtag: tarask Description: Belarusian in Taraskievica orthography Added: 2007-04-27 @@ -48317,6 +48384,15 @@ Comments: The most ancient dialect of Sanskrit used in verse and prose composed until about the 4th century B.C.E. %% Type: variant +Subtag: valbadia +Description: Val Badia standard of Ladin +Added: 2024-03-04 +Prefix: lld +Comments: Represents the standard written form of Ladin in the Val + Badia, unifying the three variants Marô, Mesaval and Badiot spoken + in this valley +%% +Type: variant Subtag: valencia Description: Valencian Added: 2007-03-06 diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/tlds-alpha-by-domain.txt b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/tlds-alpha-by-domain.txt index 0040762851b..6ec494c24a1 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/tlds-alpha-by-domain.txt +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/tlds-alpha-by-domain.txt @@ -1,4 +1,4 @@ -# Version 2024022700, Last Updated Tue Feb 27 07:07:02 2024 UTC +# Version 2024050800, Last Updated Wed May 8 07:07:02 2024 UTC AAA AARP ABB @@ -85,7 +85,6 @@ AUSPOST AUTHOR AUTO AUTOS -AVIANCA AW AWS AX @@ -510,7 +509,6 @@ GROUP GS GT GU -GUARDIAN GUCCI GUGE GUIDE From 32e13a9097472e7ac566bda9a264ce10b28096dd Mon Sep 17 00:00:00 2001 From: btangmu Date: Tue, 14 May 2024 13:27:00 -0400 Subject: [PATCH 2/4] CLDR-17615 Update languages (v46 SBRS), work in progress -GenerateValidityXML --- common/validity/language.xml | 8 ++++---- common/validity/variant.xml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/common/validity/language.xml b/common/validity/language.xml index 8513f0e974a..64282f46b7a 100644 --- a/common/validity/language.xml +++ b/common/validity/language.xml @@ -12,7 +12,7 @@ - + aa aaa~i aak~l aan~q aas~x aaz ab aba~j abl~z aca~b acd~f ach~i ack~n acp~z @@ -76,7 +76,7 @@ cia~e cih cik cim~n cip cir ciw ciy cja cje cjh~i cjk cjm~p cjs cjv cjy ckb ckh ckl~o ckq~v ckx~z - cla clc cle clh~m clo clt~u clw cly + cla clc cle clh~m clo cls~u clw cly cma cmc cme cmg cmi cml~m cmo cmr~t cna~c cng~i cnk~l cno~q cns~u cnw~x co coa~h coj~q cot~x coz @@ -427,7 +427,7 @@ rn rnb rnd rng rnl rnn rnp rnr rnw ro roa~g rol~m roo~p ror rou row rpn rpt - rri rro rrt + rri rrm rro rrt rsb rsk~n rsw rtc rth rtm rts rtw ru rub~c rue~i ruk ruo~q rut~u ruy~z @@ -522,7 +522,7 @@ vnk vnm vnp vo vor vot vra vro vrs~t - vsi vsl vsv + vsi vsl vsn vsv vto vum~n vut vwa diff --git a/common/validity/variant.xml b/common/validity/variant.xml index c26630570dd..cd1b6e1a9d9 100644 --- a/common/validity/variant.xml +++ b/common/validity/variant.xml @@ -12,7 +12,7 @@ - + 1606nict 1694acad 1901 1959acad 1994 1996 abl1943 akuapem alalc97 aluku anpezo ao1990 aranes arkaika asante auvern baku1926 balanka barla basiceng bauddha bciav bcizbl biscayan biske blasl bohoric boont bornholm @@ -29,10 +29,10 @@ metelko monoton ndyuka nedis newfound nicard njiva nulik osojs oxendict - pahawh2~4 pamaka peano petr1708 pinyin polyton provenc puter + pahawh2~4 pamaka peano pehoeji petr1708 pinyin polyton provenc puter rigik rozaj rumgr scotland scouse simple solba sotav spanglis surmiran sursilv sutsilv synnejyl - tarask tongyong tunumiit + tailo tarask tongyong tunumiit uccor ucrcor ulster unifon vaidika valbadia valencia vallader vecdruka vivaraup wadegile From 803f3f30aff415d9001bac29a7bd16370ff18a36 Mon Sep 17 00:00:00 2001 From: btangmu Date: Tue, 14 May 2024 14:47:20 -0400 Subject: [PATCH 3/4] CLDR-17615 Update languages (v46 SBRS), work in progress -ignoreMissingAlpha3: EA, EZ, IC, UN per instructions --- .../src/main/java/org/unicode/cldr/tool/GenerateEnums.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java index d9ac36c1017..0e7aea2e0bf 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java @@ -26,6 +26,8 @@ import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; + +import jdk.internal.joptsimple.OptionSet; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; @@ -79,6 +81,8 @@ public class GenerateEnums { private Set languages = new TreeSet<>(); + final private Set ignoreMissingAlpha3 = new TreeSet<>(Arrays.asList("EA", "EZ", "IC", "UN")); + public static void main(String[] args) throws IOException { GenerateEnums gen = new GenerateEnums(); gen.showLanguageInfo(); @@ -365,7 +369,7 @@ public void loadCLDRData() throws IOException { enum_UN.put(enumName(region), region); cldrCodes.add(region); } else { - if (enum_alpha3.get(region) == null) { + if (enum_alpha3.get(region) == null && !ignoreMissingAlpha3.contains(region)) { System.out.println("Missing alpha3 for: " + region); } } From 4175861f033f75f2745d3800d5ab76f4bedb9dd0 Mon Sep 17 00:00:00 2001 From: btangmu Date: Tue, 14 May 2024 14:51:29 -0400 Subject: [PATCH 4/4] CLDR-17615 Update languages (v46 SBRS), work in progress -ignoreMissingAlpha3: EA, EZ, IC, UN per instructions --- .../src/main/java/org/unicode/cldr/tool/GenerateEnums.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java index 0e7aea2e0bf..6f828ab1a02 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java @@ -26,8 +26,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; - -import jdk.internal.joptsimple.OptionSet; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; @@ -81,7 +79,8 @@ public class GenerateEnums { private Set languages = new TreeSet<>(); - final private Set ignoreMissingAlpha3 = new TreeSet<>(Arrays.asList("EA", "EZ", "IC", "UN")); + private final Set ignoreMissingAlpha3 = + new TreeSet<>(Arrays.asList("EA", "EZ", "IC", "UN")); public static void main(String[] args) throws IOException { GenerateEnums gen = new GenerateEnums();