From b5752a5be0d490176a286a704d5ee54d95ebdb8a Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Mon, 27 May 2024 11:25:30 -0600 Subject: [PATCH 01/15] Spanish countries --- dialectid/__init__.py | 2 +- dialectid/tests/__init__.py | 21 +++++++++++++++++++++ dialectid/tests/test_utils.py | 31 +++++++++++++++++++++++++++++++ dialectid/utils.py | 31 +++++++++++++++++++++++++++++++ pyproject.toml | 5 ++++- 5 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 dialectid/tests/__init__.py create mode 100644 dialectid/tests/test_utils.py create mode 100644 dialectid/utils.py diff --git a/dialectid/__init__.py b/dialectid/__init__.py index 5372be0..f322558 100644 --- a/dialectid/__init__.py +++ b/dialectid/__init__.py @@ -20,4 +20,4 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -version = '0.0.1' \ No newline at end of file +__version__ = '0.0.1' \ No newline at end of file diff --git a/dialectid/tests/__init__.py b/dialectid/tests/__init__.py new file mode 100644 index 0000000..7bf824a --- /dev/null +++ b/dialectid/tests/__init__.py @@ -0,0 +1,21 @@ +# MIT License + +# Copyright (c) 2024 Eric Sadit Tellez Avila, Daniela Alejandra Moctezuma Ochoa, Luis Guillermo Ruiz Velazquez, Mario Graff Guerrero + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. \ No newline at end of file diff --git a/dialectid/tests/test_utils.py b/dialectid/tests/test_utils.py new file mode 100644 index 0000000..03e666f --- /dev/null +++ b/dialectid/tests/test_utils.py @@ -0,0 +1,31 @@ +# MIT License + +# Copyright (c) 2024 Eric Sadit Tellez Avila, Daniela Alejandra Moctezuma Ochoa, Luis Guillermo Ruiz Velazquez, Mario Graff Guerrero + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +from dialectid import utils + + +def test_countries(): + """Test countries""" + + es = utils.COUNTRIES['es'] + assert 'es' in es and 'mx' in es diff --git a/dialectid/utils.py b/dialectid/utils.py new file mode 100644 index 0000000..7a03d89 --- /dev/null +++ b/dialectid/utils.py @@ -0,0 +1,31 @@ +# MIT License + +# Copyright (c) 2024 Eric Sadit Tellez Avila, Daniela Alejandra Moctezuma Ochoa, Luis Guillermo Ruiz Velazquez, Mario Graff Guerrero + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +COUNTRIES = dict(es=['mx', 'cl', 'es', # Mexico (MX), Chile (CL), Spain (ES) + 'ar', 'co', 'pe', # Argentina (AR), Colombia (CO), Peru (PE) + 've', 'do', 'py', # Venezuela (VE), Dominican Republic (DO), Paraguay (PY) + 'ec', 'uy', 'cr', # Ecuador (EC), Uruguay (UY), Costa Rica (CR) + 'sv', 'pa', 'gt', # El Salvador (SV), Panama (PA), Guatemala (GT) + 'hn', 'ni', 'bo', # Honduras (HN), Nicaragua (NI), Bolivia (BO) + 'cu'] # Cuba (CU) + ) diff --git a/pyproject.toml b/pyproject.toml index 71f04e6..59551db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,4 +7,7 @@ dependencies = [ dynamic = ['version'] [tool.setuptools.dynamic] -version = {attr = 'dialectid.__version__'} \ No newline at end of file +version = {attr = 'dialectid.__version__'} + +[tool.setuptools] +packages = ['dialectid', 'dialectid.tests'] \ No newline at end of file From f3896dbdb73cc9b7d4ab262ef489450247df4bb0 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Mon, 27 May 2024 11:28:23 -0600 Subject: [PATCH 02/15] Actions --- .github/workflows/test.yaml | 2 +- .gitignore | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 56a1dc9..404f24e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Set up Python - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: test auto-update-conda: true diff --git a/.gitignore b/.gitignore index 38eeeeb..4477911 100644 --- a/.gitignore +++ b/.gitignore @@ -160,4 +160,5 @@ cython_debug/ #.idea/ /.quarto/ -/_site/ \ No newline at end of file +/_site/ +.vscode/settings.json From bb3ce96a3e24ec1b1c733d7678838db9bc8368e1 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Tue, 28 May 2024 10:09:50 -0600 Subject: [PATCH 03/15] English, Arabic, and German --- dialectid/tests/test_utils.py | 6 ++++++ dialectid/utils.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/dialectid/tests/test_utils.py b/dialectid/tests/test_utils.py index 03e666f..0c66f90 100644 --- a/dialectid/tests/test_utils.py +++ b/dialectid/tests/test_utils.py @@ -29,3 +29,9 @@ def test_countries(): es = utils.COUNTRIES['es'] assert 'es' in es and 'mx' in es + en = utils.COUNTRIES['en'] + assert 'us' in en and 'zw' in en + ar = utils.COUNTRIES['ar'] + assert 'ye' in ar and 'so' in ar + de = utils.COUNTRIES['de'] + assert 'de' in de and 'ch' in de diff --git a/dialectid/utils.py b/dialectid/utils.py index 7a03d89..e64a190 100644 --- a/dialectid/utils.py +++ b/dialectid/utils.py @@ -27,5 +27,38 @@ 'ec', 'uy', 'cr', # Ecuador (EC), Uruguay (UY), Costa Rica (CR) 'sv', 'pa', 'gt', # El Salvador (SV), Panama (PA), Guatemala (GT) 'hn', 'ni', 'bo', # Honduras (HN), Nicaragua (NI), Bolivia (BO) - 'cu'] # Cuba (CU) + 'cu' # Cuba (CU) + ], + en=['ai', 'ag', 'au', # Anguilla, Antigua and Barbuda, Australia + 'bs', 'bb', 'bz', # Bahamas, Barbados, Belize + 'bm', 'vg', 'cm', # Bermuda, British Virgin Islands, Cameroon + 'ca', 'ky', 'ck', # Canada, Cayman Islands, Cook Islands + 'dm', 'sz', 'fk', # Dominica, Eswatini, Falkland Islands + 'fj', 'gm', 'gz', # Fiji, Gambia, Ghana + 'gi', 'gd', 'gu', # Gibraltar, Grenada, Guam + 'gg', 'gy', 'in', # Guernsey, Guyana, India + 'ie', 'im', 'jm', # Ireland, Isle of Man, Jamaica + 'ke', 'ls', 'lr', # Kenya, Lesotho, Liberia + 'mw', 'mt', 'mu', # Malawi, Malta, Mauritius + 'fm', 'na', 'nz', # Micronesia, Namibia, New Zealand + 'ng', 'mp', 'pk', # Nigeria, Northern Mariana Islands, Pakistan + 'pw', 'pg', 'ph', # Palau, Papua New Guinea, Philippines + 'rw', 'sh', 'kn', # Rwanda, Saint Helena, Ascension, and Tristan da Cunha, Saint Kitts and Nevis + 'lc', 'vc', 'sl', # Saint Lucia, Saint Vincent and the Grenadines, Sierra Leone + 'sg', 'sx', 'sb', # Singapore, Sint Maarten, Solomon Islands + 'za', 'sd', 'to', # South Africa, Sudan, Tonga + 'tt', 'tc', 'ug', # Trinidad y Tobago, Turks and Caicos Islands, Uganda + 'gb', 'us', 'vu', # United Kingdom, United States, Vanuatu + 'vg', 'vi', 'zm', # Virgin Islands (GB), Virgin Islands (US), Zambia + 'zw' # Zimbabwe + ], + ar=['dz', 'bh', 'td', # Algeria, Bahrain, Chad + 'dj', 'eg', 'iq', # Djibouti, Egypt, Iraq + 'jo', 'kw', 'lb', # Jordan, Kuwait, Lebanon, + 'ly', 'mr', 'ma', # Libya, Mauritania, Morocco + 'om', 'qa', 'sa', # Oman, Qatar, Saudi Arabia + 'so', 'sd', 'sy', # Somalia, Sudan, Syria + 'tn', 'ae', 'ye', # Tunisia, United Arab Emirates, Yemen + ], + de=['at', 'de', 'ch'], # Austria, Germany, Switzerland ) From ac35f898c0d48eb576b9761caf128778ae61c4e6 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Tue, 28 May 2024 11:10:20 -0600 Subject: [PATCH 04/15] URL --- dialectid/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dialectid/tests/test_utils.py b/dialectid/tests/test_utils.py index 0c66f90..f6c68c5 100644 --- a/dialectid/tests/test_utils.py +++ b/dialectid/tests/test_utils.py @@ -19,7 +19,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - +# https://www.cia.gov/the-world-factbook/about/archives/2021/field/languages/ from dialectid import utils From a73f7afd28cd0e2cbb7818f82cf81ca50b465d7b Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Tue, 28 May 2024 11:16:02 -0600 Subject: [PATCH 05/15] Spanish --- quarto/data/es-recall.csv | 462 +++++--------------------------------- 1 file changed, 57 insertions(+), 405 deletions(-) diff --git a/quarto/data/es-recall.csv b/quarto/data/es-recall.csv index 4e0f2e1..68daba9 100644 --- a/quarto/data/es-recall.csv +++ b/quarto/data/es-recall.csv @@ -1,406 +1,58 @@ ,Recall,Country,Training Size -0,0.3913920645595158,pr,4096 -1,0.556640625,cr,4096 -2,0.492431640625,tr,4096 -3,0.6356275303643725,lk,4096 -4,0.6304868316041501,tw,4096 -5,0.560546875,ca,4096 -6,0.5198019801980198,om,4096 -7,0.376865671641791,am,4096 -8,0.63232421875,cl,4096 -9,0.6141732283464567,ml,4096 -10,0.46865443425076453,vn,4096 -11,0.4085680285600952,ru,4096 -12,0.60595703125,ni,4096 -13,0.4716981132075472,bh,4096 -14,0.58837890625,gt,4096 -15,0.5174679008659301,th,4096 -16,0.5163710777626194,ua,4096 -17,0.599609375,pe,4096 -18,0.9427083333333334,bw,4096 -19,0.3277972027972028,cy,4096 -20,0.5707762557077626,cm,4096 -21,0.8297730307076101,tz,4096 -22,0.7806691449814126,ir,4096 -23,0.60986328125,pl,4096 -24,0.565185546875,be,4096 -25,0.4667487684729064,cw,4096 -26,0.7240618101545254,ug,4096 -27,0.646728515625,do,4096 -28,0.583740234375,ch,4096 -29,0.4526112185686654,et,4096 -30,0.44126984126984126,la,4096 -31,0.6717961654894047,ph,4096 -32,0.57861328125,de,4096 -33,0.5141108466507991,cz,4096 -34,0.486083984375,hu,4096 -35,0.629638671875,ar,4096 -36,0.84375,zw,4096 -37,0.55,mv,4096 -38,0.5869565217391305,mc,4096 -39,0.6240234375,jp,4096 -40,0.5393180236604036,aw,4096 -41,0.44775390625,pt,4096 -42,0.5546875,us,4096 -43,0.6872498570611778,za,4096 -44,0.8059701492537313,sc,4096 -45,0.6301369863013698,ci,4096 -46,0.7808219178082192,bj,4096 -47,0.543212890625,ad,4096 -48,0.6149614961496149,bd,4096 -49,0.6959287531806616,si,4096 -50,0.5363636363636364,bs,4096 -51,0.592041015625,hn,4096 -52,0.616943359375,mx,4096 -53,0.683837890625,cu,4096 -54,0.5644329896907216,sa,4096 -55,0.8629825889477668,va,4096 -56,0.47119741100323626,is,4096 -57,0.47198641765704585,ee,4096 -58,0.5703839122486288,ht,4096 -59,0.7924246199850485,in,4096 -60,0.5962484624846248,bg,4096 -61,0.552099533437014,bz,4096 -62,0.472636815920398,iq,4096 -63,0.28378378378378377,ge,4096 -64,0.5551497443389335,lu,4096 -65,0.6947368421052632,ye,4096 -66,0.5224609375,bo,4096 -67,0.35944700460829493,me,4096 -68,0.58935546875,dk,4096 -69,0.676025390625,es,4096 -70,0.4391332058636074,sg,4096 -71,0.7706576728499157,gh,4096 -72,0.5752427184466019,hr,4096 -73,0.5546875,fr,4096 -74,0.5743412797992472,fi,4096 -75,0.5121365697519339,gr,4096 -76,0.48880597014925375,tn,4096 -77,0.610107421875,uy,4096 -78,0.635,xk,4096 -79,0.8579610538373424,ng,4096 -80,0.562744140625,br,4096 -81,0.638916015625,qa,4096 -82,0.510009765625,at,4096 -83,0.6706642066420664,mz,4096 -84,0.5221987315010571,tt,4096 -85,0.559814453125,nz,4096 -86,0.5007267441860465,eg,4096 -87,0.585205078125,no,4096 -88,0.613037109375,sv,4096 -89,0.63330078125,ve,4096 -90,0.4954128440366973,kh,4096 -91,0.3992740471869328,rs,4096 -92,0.44553072625698326,cv,4096 -93,0.5712890625,pa,4096 -94,0.6075778078484438,sn,4096 -95,0.7699055330634278,ke,4096 -96,0.59423828125,ec,4096 -97,0.6869918699186992,ao,4096 -98,0.5348432055749129,dz,4096 -99,0.8878048780487805,pk,4096 -100,0.56396484375,ie,4096 -101,0.5139344262295082,my,4096 -102,0.611328125,kr,4096 -103,0.5561643835616439,jo,4096 -104,0.47201492537313433,al,4096 -105,0.476318359375,it,4096 -106,0.5915492957746479,kw,4096 -107,0.45981387478849406,ro,4096 -108,0.56982421875,nl,4096 -109,0.5588235294117647,lv,4096 -110,0.5969042244437278,cn,4096 -111,0.5234375,mt,4096 -112,0.604248046875,co,4096 -113,0.4733893557422969,gi,4096 -114,0.47570332480818417,ba,4096 -115,0.5684931506849316,mo,4096 -116,0.5961538461538461,ky,4096 -117,0.6227709190672154,hk,4096 -118,0.638916015625,gq,4096 -119,0.5588323803791755,ma,4096 -120,0.5020746887966805,kz,4096 -121,0.6015625,py,4096 -122,0.5654296875,au,4096 -123,0.552734375,se,4096 -124,0.4577114427860697,mg,4096 -125,0.6024242424242424,id,4096 -126,0.47900390625,ae,4096 -127,0.5236686390532544,lb,4096 -128,0.57470703125,il,4096 -129,0.560791015625,gb,4096 -130,0.5294725956566702,sk,4096 -131,0.6439024390243903,cd,4096 -132,0.4908616187989556,np,4096 -133,0.4717800289435601,jm,4096 -134,0.4886240520043337,lt,4096 -135,0.42636180228648285,pr,8192 -136,0.57470703125,cr,8192 -137,0.511962890625,tr,8192 -138,0.6761133603238867,lk,8192 -139,0.6536312849162011,tw,8192 -140,0.576904296875,ca,8192 -141,0.5544554455445545,om,8192 -142,0.39925373134328357,am,8192 -143,0.6572265625,cl,8192 -144,0.6141732283464567,ml,8192 -145,0.5022935779816514,vn,8192 -146,0.4545815152717176,ru,8192 -147,0.63037109375,ni,8192 -148,0.46750524109014674,bh,8192 -149,0.610595703125,gt,8192 -150,0.5455359808898178,th,8192 -151,0.5368349249658936,ua,8192 -152,0.62548828125,pe,8192 -153,0.9427083333333334,bw,8192 -154,0.3811188811188811,cy,8192 -155,0.5707762557077626,cm,8192 -156,0.835781041388518,tz,8192 -157,0.8066914498141264,ir,8192 -158,0.630615234375,pl,8192 -159,0.576904296875,be,8192 -160,0.4975369458128079,cw,8192 -161,0.7262693156732892,ug,8192 -162,0.6748046875,do,8192 -163,0.599365234375,ch,8192 -164,0.4526112185686654,et,8192 -165,0.44126984126984126,la,8192 -166,0.6745711402623612,ph,8192 -167,0.59521484375,de,8192 -168,0.5372322339340361,cz,8192 -169,0.5234375,hu,8192 -170,0.658447265625,ar,8192 -171,0.84375,zw,8192 -172,0.5576923076923077,mv,8192 -173,0.6141304347826086,mc,8192 -174,0.633544921875,jp,8192 -175,0.5629784272790536,aw,8192 -176,0.45703125,pt,8192 -177,0.576416015625,us,8192 -178,0.6883933676386507,za,8192 -179,0.8059701492537313,sc,8192 -180,0.639269406392694,ci,8192 -181,0.7808219178082192,bj,8192 -182,0.56494140625,ad,8192 -183,0.6259625962596259,bd,8192 -184,0.7175572519083969,si,8192 -185,0.5113636363636364,bs,8192 -186,0.623046875,hn,8192 -187,0.62744140625,mx,8192 -188,0.706787109375,cu,8192 -189,0.5790378006872853,sa,8192 -190,0.8599545798637396,va,8192 -191,0.47766990291262135,is,8192 -192,0.5178268251273345,ee,8192 -193,0.5795246800731262,ht,8192 -194,0.8063792673810117,in,8192 -195,0.656519065190652,bg,8192 -196,0.5878693623639192,bz,8192 -197,0.527363184079602,iq,8192 -198,0.3952702702702703,ge,8192 -199,0.5595325054784515,lu,8192 -200,0.6947368421052632,ye,8192 -201,0.545654296875,bo,8192 -202,0.37327188940092165,me,8192 -203,0.61376953125,dk,8192 -204,0.71142578125,es,8192 -205,0.4710006373486297,sg,8192 -206,0.7976391231028668,gh,8192 -207,0.5995145631067961,hr,8192 -208,0.561767578125,fr,8192 -209,0.5790464240903388,fi,8192 -210,0.5270738863696985,gr,8192 -211,0.5,tn,8192 -212,0.645751953125,uy,8192 -213,0.66,xk,8192 -214,0.8591065292096219,ng,8192 -215,0.573486328125,br,8192 -216,0.650390625,qa,8192 -217,0.52294921875,at,8192 -218,0.7297047970479705,mz,8192 -219,0.5137420718816068,tt,8192 -220,0.571044921875,nz,8192 -221,0.5039970930232558,eg,8192 -222,0.596435546875,no,8192 -223,0.6318359375,sv,8192 -224,0.650146484375,ve,8192 -225,0.4908256880733945,kh,8192 -226,0.44283121597096187,rs,8192 -227,0.44553072625698326,cv,8192 -228,0.601806640625,pa,8192 -229,0.6292286874154263,sn,8192 -230,0.7887989203778677,ke,8192 -231,0.6015625,ec,8192 -232,0.7317073170731707,ao,8192 -233,0.5313588850174216,dz,8192 -234,0.8804878048780488,pk,8192 -235,0.59765625,ie,8192 -236,0.5385245901639344,my,8192 -237,0.630615234375,kr,8192 -238,0.5534246575342465,jo,8192 -239,0.4944029850746269,al,8192 -240,0.4990234375,it,8192 -241,0.6267605633802817,kw,8192 -242,0.47419627749576987,ro,8192 -243,0.591552734375,nl,8192 -244,0.5735294117647058,lv,8192 -245,0.6223798774588842,cn,8192 -246,0.56494140625,mt,8192 -247,0.63330078125,co,8192 -248,0.49019607843137253,gi,8192 -249,0.44501278772378516,ba,8192 -250,0.5684931506849316,mo,8192 -251,0.6188811188811189,ky,8192 -252,0.6598079561042524,hk,8192 -253,0.668701171875,gq,8192 -254,0.5943424616310563,ma,8192 -255,0.5103734439834025,kz,8192 -256,0.63671875,py,8192 -257,0.5712890625,au,8192 -258,0.5693359375,se,8192 -259,0.4577114427860697,mg,8192 -260,0.6236363636363637,id,8192 -261,0.48779296875,ae,8192 -262,0.6153846153846154,lb,8192 -263,0.60546875,il,8192 -264,0.583740234375,gb,8192 -265,0.5584281282316442,sk,8192 -266,0.624390243902439,cd,8192 -267,0.4830287206266319,np,8192 -268,0.5050651230101303,jm,8192 -269,0.5113759479956663,lt,8192 -270,0.4593140551445864,pr,16384 -271,0.593017578125,cr,16384 -272,0.5322265625,tr,16384 -273,0.6842105263157895,lk,16384 -274,0.6763766959297686,tw,16384 -275,0.584228515625,ca,16384 -276,0.5544554455445545,om,16384 -277,0.40671641791044777,am,16384 -278,0.6708984375,cl,16384 -279,0.6141732283464567,ml,16384 -280,0.49770642201834864,vn,16384 -281,0.4049980166600555,ru,16384 -282,0.648193359375,ni,16384 -283,0.46750524109014674,bh,16384 -284,0.628662109375,gt,16384 -285,0.5742012541057032,th,16384 -286,0.5450204638472033,ua,16384 -287,0.63818359375,pe,16384 -288,0.9427083333333334,bw,16384 -289,0.41346153846153844,cy,16384 -290,0.5707762557077626,cm,16384 -291,0.8544726301735648,tz,16384 -292,0.8066914498141264,ir,16384 -293,0.64208984375,pl,16384 -294,0.60498046875,be,16384 -295,0.5073891625615764,cw,16384 -296,0.7262693156732892,ug,16384 -297,0.69970703125,do,16384 -298,0.6220703125,ch,16384 -299,0.4526112185686654,et,16384 -300,0.44126984126984126,la,16384 -301,0.6831483350151363,ph,16384 -302,0.59765625,de,16384 -303,0.5453927235634138,cz,16384 -304,0.549072265625,hu,16384 -305,0.688232421875,ar,16384 -306,0.84375,zw,16384 -307,0.5576923076923077,mv,16384 -308,0.6141304347826086,mc,16384 -309,0.6513671875,jp,16384 -310,0.5720250521920668,aw,16384 -311,0.4853515625,pt,16384 -312,0.56591796875,us,16384 -313,0.7044025157232704,za,16384 -314,0.8059701492537313,sc,16384 -315,0.639269406392694,ci,16384 -316,0.7808219178082192,bj,16384 -317,0.584716796875,ad,16384 -318,0.6259625962596259,bd,16384 -319,0.7213740458015268,si,16384 -320,0.5068181818181818,bs,16384 -321,0.641845703125,hn,16384 -322,0.65283203125,mx,16384 -323,0.7314453125,cu,16384 -324,0.5967926689576174,sa,16384 -325,0.8531415594246783,va,16384 -326,0.47896440129449835,is,16384 -327,0.5280135823429541,ee,16384 -328,0.5941499085923218,ht,16384 -329,0.809369548965861,in,16384 -330,0.6783517835178352,bg,16384 -331,0.5878693623639192,bz,16384 -332,0.527363184079602,iq,16384 -333,0.4222972972972973,ge,16384 -334,0.5558802045288532,lu,16384 -335,0.6947368421052632,ye,16384 -336,0.575439453125,bo,16384 -337,0.37327188940092165,me,16384 -338,0.633056640625,dk,16384 -339,0.734375,es,16384 -340,0.48119821542383684,sg,16384 -341,0.7892074198988196,gh,16384 -342,0.5946601941747572,hr,16384 -343,0.577392578125,fr,16384 -344,0.6066499372647428,fi,16384 -345,0.5433448919711923,gr,16384 -346,0.5074626865671642,tn,16384 -347,0.663818359375,uy,16384 -348,0.66,xk,16384 -349,0.856815578465063,ng,16384 -350,0.57470703125,br,16384 -351,0.666015625,qa,16384 -352,0.530029296875,at,16384 -353,0.738929889298893,mz,16384 -354,0.48414376321353064,tt,16384 -355,0.56884765625,nz,16384 -356,0.5101744186046512,eg,16384 -357,0.611328125,no,16384 -358,0.654052734375,sv,16384 -359,0.677978515625,ve,16384 -360,0.5,kh,16384 -361,0.47186932849364793,rs,16384 -362,0.44553072625698326,cv,16384 -363,0.632080078125,pa,16384 -364,0.6292286874154263,sn,16384 -365,0.7995951417004049,ke,16384 -366,0.6240234375,ec,16384 -367,0.7317073170731707,ao,16384 -368,0.5505226480836237,dz,16384 -369,0.875609756097561,pk,16384 -370,0.60498046875,ie,16384 -371,0.5729508196721311,my,16384 -372,0.642333984375,kr,16384 -373,0.5589041095890411,jo,16384 -374,0.4925373134328358,al,16384 -375,0.517333984375,it,16384 -376,0.6056338028169014,kw,16384 -377,0.49323181049069376,ro,16384 -378,0.615966796875,nl,16384 -379,0.5735294117647058,lv,16384 -380,0.6304417929700097,cn,16384 -381,0.587646484375,mt,16384 -382,0.64892578125,co,16384 -383,0.5154061624649859,gi,16384 -384,0.4629156010230179,ba,16384 -385,0.5684931506849316,mo,16384 -386,0.6258741258741258,ky,16384 -387,0.6460905349794238,hk,16384 -388,0.683837890625,gq,16384 -389,0.5961480589828468,ma,16384 -390,0.5103734439834025,kz,16384 -391,0.6708984375,py,16384 -392,0.5849609375,au,16384 -393,0.578857421875,se,16384 -394,0.4577114427860697,mg,16384 -395,0.6306060606060606,id,16384 -396,0.51318359375,ae,16384 -397,0.6420118343195266,lb,16384 -398,0.63232421875,il,16384 -399,0.58837890625,gb,16384 -400,0.6111685625646329,sk,16384 -401,0.624390243902439,cd,16384 -402,0.4830287206266319,np,16384 -403,0.5354558610709117,jm,16384 -404,0.5417118093174431,lt,16384 +0,0.563720703125,cr,4096 +1,0.62939453125,cl,4096 +2,0.610107421875,ni,4096 +3,0.585693359375,gt,4096 +4,0.60205078125,pe,4096 +5,0.650634765625,do,4096 +6,0.63232421875,ar,4096 +7,0.589599609375,hn,4096 +8,0.625732421875,mx,4096 +9,0.68896484375,cu,4096 +10,0.51708984375,bo,4096 +11,0.680419921875,es,4096 +12,0.609375,uy,4096 +13,0.60595703125,sv,4096 +14,0.63525390625,ve,4096 +15,0.5791015625,pa,4096 +16,0.598388671875,ec,4096 +17,0.601318359375,co,4096 +18,0.60302734375,py,4096 +19,0.5791015625,cr,8192 +20,0.65625,cl,8192 +21,0.624755859375,ni,8192 +22,0.6103515625,gt,8192 +23,0.62646484375,pe,8192 +24,0.673828125,do,8192 +25,0.656005859375,ar,8192 +26,0.620361328125,hn,8192 +27,0.638427734375,mx,8192 +28,0.7060546875,cu,8192 +29,0.541748046875,bo,8192 +30,0.716552734375,es,8192 +31,0.642333984375,uy,8192 +32,0.620849609375,sv,8192 +33,0.64990234375,ve,8192 +34,0.59912109375,pa,8192 +35,0.6015625,ec,8192 +36,0.626953125,co,8192 +37,0.63623046875,py,8192 +38,0.59521484375,cr,16384 +39,0.670166015625,cl,16384 +40,0.647705078125,ni,16384 +41,0.63134765625,gt,16384 +42,0.639892578125,pe,16384 +43,0.700927734375,do,16384 +44,0.680908203125,ar,16384 +45,0.639404296875,hn,16384 +46,0.660888671875,mx,16384 +47,0.737060546875,cu,16384 +48,0.569580078125,bo,16384 +49,0.7421875,es,16384 +50,0.666015625,uy,16384 +51,0.654541015625,sv,16384 +52,0.67138671875,ve,16384 +53,0.6298828125,pa,16384 +54,0.623046875,ec,16384 +55,0.648681640625,co,16384 +56,0.666259765625,py,16384 From 4a3dafdece5c90f224f7d260fdae2d260f99d48e Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Tue, 28 May 2024 21:44:55 -0600 Subject: [PATCH 06/15] French --- dialectid/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dialectid/utils.py b/dialectid/utils.py index e64a190..13bdc78 100644 --- a/dialectid/utils.py +++ b/dialectid/utils.py @@ -19,6 +19,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +# https://www.cia.gov/the-world-factbook/about/archives/2021/field/languages/ COUNTRIES = dict(es=['mx', 'cl', 'es', # Mexico (MX), Chile (CL), Spain (ES) @@ -58,7 +59,17 @@ 'ly', 'mr', 'ma', # Libya, Mauritania, Morocco 'om', 'qa', 'sa', # Oman, Qatar, Saudi Arabia 'so', 'sd', 'sy', # Somalia, Sudan, Syria - 'tn', 'ae', 'ye', # Tunisia, United Arab Emirates, Yemen + 'tn', 'ae', 'ye' # Tunisia, United Arab Emirates, Yemen ], de=['at', 'de', 'ch'], # Austria, Germany, Switzerland + fr=['be', 'bj', 'bf', # Belgium, Benin, Burkina Faso + 'cm', 'ca', 'cf', # Cameroon, Canada, Central African Republic + 'td', 'km', 'cd', # Chad, Comoros, Congo (Republic) + 'cg', 'cl', 'dj', # Congo, Cote d'lvoire, Djibouti + 'fr', 'pf', 'ga', # France, French Polynesia, Gabon + 'gn', 'ht', 'lu', # Guinea, Haiti, Luxembourg + 'ml', 'mc', 'nc', # Mali, Monaco, New Caledonia + 'ne', 'rw', 'sn', # Niger, Rwanda, Senegal + 'ch', 'tg' # Switzerland, Togo + ] ) From 8e85a237f6adb4a9cb987b78e09f63dd37f551e4 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Tue, 28 May 2024 22:01:37 -0600 Subject: [PATCH 07/15] Arabic, English --- quarto/data/ar-recall.csv | 342 +++------------ quarto/data/en-recall.csv | 861 ++++++++------------------------------ 2 files changed, 249 insertions(+), 954 deletions(-) diff --git a/quarto/data/ar-recall.csv b/quarto/data/ar-recall.csv index 818bb81..776b7cd 100644 --- a/quarto/data/ar-recall.csv +++ b/quarto/data/ar-recall.csv @@ -1,280 +1,64 @@ ,Recall,Country,Training Size -0,0.6842105263157895,fi,4096 -1,0.724609375,lb,4096 -2,0.595460614152203,za,4096 -3,0.6666666666666666,uz,4096 -4,0.574462890625,gb,4096 -5,0.681640625,nl,4096 -6,0.632080078125,sd,4096 -7,0.7602941176470588,ar,4096 -8,0.7880859375,ma,4096 -9,0.5508274231678487,ba,4096 -10,0.552978515625,tr,4096 -11,0.589111328125,jo,4096 -12,0.5222052067381318,pt,4096 -13,0.696044921875,iq,4096 -14,0.724709784411277,mr,4096 -15,0.746826171875,dz,4096 -16,0.7439897698209719,ir,4096 -17,0.5260273972602739,ml,4096 -18,0.59033203125,es,4096 -19,0.6200726141078838,de,4096 -20,0.7465483234714004,hu,4096 -21,0.6087114337568058,my,4096 -22,0.7425249169435216,bd,4096 -23,0.503448275862069,cd,4096 -24,0.5701513067400276,jp,4096 -25,0.552734375,qa,4096 -26,0.6197718631178707,cz,4096 -27,0.592529296875,it,4096 -28,0.7129337539432177,mx,4096 -29,0.6393321875767247,sy,4096 -30,0.727705112960761,ke,4096 -31,0.4604591836734694,gr,4096 -32,0.7304843304843305,cn,4096 -33,0.60009765625,bh,4096 -34,0.4294117647058823,kz,4096 -35,0.695556640625,sa,4096 -36,0.6459459459459459,lk,4096 -37,0.81,tw,4096 -38,0.6640625,ca,4096 -39,0.675537109375,om,4096 -40,0.9494423791821561,vn,4096 -41,0.5275330396475771,ru,4096 -42,0.7152103559870551,dj,4096 -43,0.56494140625,th,4096 -44,0.6347305389221557,ua,4096 -45,0.5607843137254902,cy,4096 -46,0.7058823529411765,so,4096 -47,0.5224913494809689,tz,4096 -48,0.7493589743589744,pl,4096 -49,0.60888671875,be,4096 -50,0.45027624309392267,ug,4096 -51,0.6278836509528586,az,4096 -52,0.571044921875,ch,4096 -53,0.7104795737122558,et,4096 -54,0.6651718983557549,ph,4096 -55,0.571969696969697,mv,4096 -56,0.58935546875,ly,4096 -57,0.5576171875,us,4096 -58,0.5480943738656987,ci,4096 -59,0.7394783639596918,in,4096 -60,0.5,ge,4096 -61,0.688720703125,ye,4096 -62,0.669971671388102,td,4096 -63,0.5406189555125726,dk,4096 -64,0.7792207792207793,af,4096 -65,0.5150214592274678,sg,4096 -66,0.5486577181208053,gh,4096 -67,0.53857421875,fr,4096 -68,0.6787109375,tn,4096 -69,0.7776230269266481,ng,4096 -70,0.7460159362549801,br,4096 -71,0.62255859375,at,4096 -72,0.6847133757961783,nz,4096 -73,0.646728515625,eg,4096 -74,0.5777407253022092,no,4096 -75,0.8794326241134752,ve,4096 -76,0.6318840579710145,rs,4096 -77,0.7109737248840804,sn,4096 -78,0.3880597014925373,ec,4096 -79,0.5364238410596026,mu,4096 -80,0.8397044651461613,pk,4096 -81,0.5086461408688318,ie,4096 -82,0.5684803001876173,kr,4096 -83,0.693359375,kw,4096 -84,0.5652173913043478,ro,4096 -85,0.6959459459459459,ne,4096 -86,0.32116788321167883,mt,4096 -87,0.6806640625,au,4096 -88,0.5890485226348604,se,4096 -89,0.7000503271263211,id,4096 -90,0.567626953125,ae,4096 -91,0.6251780119624039,il,4096 -92,0.4853801169590643,sk,4096 -93,0.6912280701754386,fi,8192 -94,0.738037109375,lb,8192 -95,0.6181575433911882,za,8192 -96,0.6666666666666666,uz,8192 -97,0.578125,gb,8192 -98,0.6669921875,nl,8192 -99,0.646728515625,sd,8192 -100,0.7602941176470588,ar,8192 -101,0.792236328125,ma,8192 -102,0.5650118203309693,ba,8192 -103,0.558349609375,tr,8192 -104,0.594970703125,jo,8192 -105,0.5076569678407351,pt,8192 -106,0.710205078125,iq,8192 -107,0.7385295743504698,mr,8192 -108,0.769287109375,dz,8192 -109,0.759079283887468,ir,8192 -110,0.5260273972602739,ml,8192 -111,0.608154296875,es,8192 -112,0.6231846473029046,de,8192 -113,0.7416173570019724,hu,8192 -114,0.6254083484573503,my,8192 -115,0.7574750830564784,bd,8192 -116,0.4827586206896552,cd,8192 -117,0.5914718019257221,jp,8192 -118,0.56787109375,qa,8192 -119,0.6216730038022814,cz,8192 -120,0.611083984375,it,8192 -121,0.7160883280757098,mx,8192 -122,0.6599558065308126,sy,8192 -123,0.7395957193816884,ke,8192 -124,0.4732142857142857,gr,8192 -125,0.750997150997151,cn,8192 -126,0.621826171875,bh,8192 -127,0.4294117647058823,kz,8192 -128,0.702392578125,sa,8192 -129,0.6418918918918919,lk,8192 -130,0.81,tw,8192 -131,0.67138671875,ca,8192 -132,0.69775390625,om,8192 -133,0.9494423791821561,vn,8192 -134,0.5440528634361234,ru,8192 -135,0.7152103559870551,dj,8192 -136,0.559814453125,th,8192 -137,0.6347305389221557,ua,8192 -138,0.5882352941176471,cy,8192 -139,0.7005347593582888,so,8192 -140,0.5294117647058824,tz,8192 -141,0.7615384615384615,pl,8192 -142,0.63623046875,be,8192 -143,0.45027624309392267,ug,8192 -144,0.6108324974924775,az,8192 -145,0.59130859375,ch,8192 -146,0.7335701598579041,et,8192 -147,0.6965620328849028,ph,8192 -148,0.5946969696969697,mv,8192 -149,0.620361328125,ly,8192 -150,0.561767578125,us,8192 -151,0.5662431941923775,ci,8192 -152,0.7374036751630113,in,8192 -153,0.4975609756097561,ge,8192 -154,0.706298828125,ye,8192 -155,0.6671388101983002,td,8192 -156,0.5725338491295938,dk,8192 -157,0.784992784992785,af,8192 -158,0.47639484978540775,sg,8192 -159,0.5486577181208053,gh,8192 -160,0.560302734375,fr,8192 -161,0.685546875,tn,8192 -162,0.7901578458681523,ng,8192 -163,0.7649402390438247,br,8192 -164,0.63427734375,at,8192 -165,0.7054140127388535,nz,8192 -166,0.6552734375,eg,8192 -167,0.6085869112130055,no,8192 -168,0.8794326241134752,ve,8192 -169,0.6318840579710145,rs,8192 -170,0.7125193199381762,sn,8192 -171,0.5373134328358209,ec,8192 -172,0.5496688741721855,mu,8192 -173,0.8522325730806296,pk,8192 -174,0.5128637705609448,ie,8192 -175,0.5675422138836773,kr,8192 -176,0.69677734375,kw,8192 -177,0.5570652173913043,ro,8192 -178,0.6959459459459459,ne,8192 -179,0.34306569343065696,mt,8192 -180,0.694580078125,au,8192 -181,0.6082949308755761,se,8192 -182,0.7075993960744842,id,8192 -183,0.583251953125,ae,8192 -184,0.6428367986328681,il,8192 -185,0.5146198830409356,sk,8192 -186,0.7368421052631579,fi,16384 -187,0.74267578125,lb,16384 -188,0.6154873164218959,za,16384 -189,0.6666666666666666,uz,16384 -190,0.589599609375,gb,16384 -191,0.66259765625,nl,16384 -192,0.666015625,sd,16384 -193,0.7602941176470588,ar,16384 -194,0.79345703125,ma,16384 -195,0.5650118203309693,ba,16384 -196,0.58251953125,tr,16384 -197,0.607177734375,jo,16384 -198,0.5076569678407351,pt,16384 -199,0.716796875,iq,16384 -200,0.7473742399115534,mr,16384 -201,0.772705078125,dz,16384 -202,0.7736572890025576,ir,16384 -203,0.5260273972602739,ml,16384 -204,0.60888671875,es,16384 -205,0.6154045643153527,de,16384 -206,0.7445759368836292,hu,16384 -207,0.6460980036297641,my,16384 -208,0.7574750830564784,bd,16384 -209,0.4827586206896552,cd,16384 -210,0.6121045392022009,jp,16384 -211,0.5693359375,qa,16384 -212,0.596958174904943,cz,16384 -213,0.60400390625,it,16384 -214,0.7160883280757098,mx,16384 -215,0.6646206727228088,sy,16384 -216,0.7395957193816884,ke,16384 -217,0.49489795918367346,gr,16384 -218,0.7173789173789173,cn,16384 -219,0.618896484375,bh,16384 -220,0.4294117647058823,kz,16384 -221,0.72607421875,sa,16384 -222,0.6459459459459459,lk,16384 -223,0.81,tw,16384 -224,0.681884765625,ca,16384 -225,0.71533203125,om,16384 -226,0.9494423791821561,vn,16384 -227,0.5631424375917768,ru,16384 -228,0.7152103559870551,dj,16384 -229,0.5673828125,th,16384 -230,0.6407185628742516,ua,16384 -231,0.5843137254901961,cy,16384 -232,0.7005347593582888,so,16384 -233,0.5294117647058824,tz,16384 -234,0.7653846153846153,pl,16384 -235,0.616455078125,be,16384 -236,0.45027624309392267,ug,16384 -237,0.6078234704112337,az,16384 -238,0.601318359375,ch,16384 -239,0.7335701598579041,et,16384 -240,0.7219730941704036,ph,16384 -241,0.5946969696969697,mv,16384 -242,0.655029296875,ly,16384 -243,0.591552734375,us,16384 -244,0.5317604355716878,ci,16384 -245,0.7341434499110847,in,16384 -246,0.5073170731707317,ge,16384 -247,0.718994140625,ye,16384 -248,0.6671388101983002,td,16384 -249,0.5812379110251451,dk,16384 -250,0.784992784992785,af,16384 -251,0.47639484978540775,sg,16384 -252,0.5486577181208053,gh,16384 -253,0.563232421875,fr,16384 -254,0.68896484375,tn,16384 -255,0.7985143918291551,ng,16384 -256,0.7689243027888446,br,16384 -257,0.645263671875,at,16384 -258,0.6480891719745223,nz,16384 -259,0.671630859375,eg,16384 -260,0.6202584410170905,no,16384 -261,0.8794326241134752,ve,16384 -262,0.6318840579710145,rs,16384 -263,0.7109737248840804,sn,16384 -264,0.5671641791044776,ec,16384 -265,0.5496688741721855,mu,16384 -266,0.8490202377128172,pk,16384 -267,0.5166596372838465,ie,16384 -268,0.600375234521576,kr,16384 -269,0.7060546875,kw,16384 -270,0.5760869565217391,ro,16384 -271,0.6959459459459459,ne,16384 -272,0.34306569343065696,mt,16384 -273,0.705078125,au,16384 -274,0.6245595012198428,se,16384 -275,0.7048314041268243,id,16384 -276,0.591796875,ae,16384 -277,0.6542295642267161,il,16384 -278,0.5146198830409356,sk,16384 +0,0.73388671875,lb,4096 +1,0.640625,sd,4096 +2,0.794677734375,ma,4096 +3,0.596435546875,jo,4096 +4,0.700927734375,iq,4096 +5,0.7291321171918187,mr,4096 +6,0.751220703125,dz,4096 +7,0.555419921875,qa,4096 +8,0.6523447090596611,sy,4096 +9,0.6025390625,bh,4096 +10,0.701171875,sa,4096 +11,0.677734375,om,4096 +12,0.7313915857605178,dj,4096 +13,0.714795008912656,so,4096 +14,0.60546875,ly,4096 +15,0.698486328125,ye,4096 +16,0.6628895184135978,td,4096 +17,0.689208984375,tn,4096 +18,0.64501953125,eg,4096 +19,0.70556640625,kw,4096 +20,0.567138671875,ae,4096 +21,0.745849609375,lb,8192 +22,0.645751953125,sd,8192 +23,0.79541015625,ma,8192 +24,0.606201171875,jo,8192 +25,0.71728515625,iq,8192 +26,0.7435046987285793,mr,8192 +27,0.771728515625,dz,8192 +28,0.57470703125,qa,8192 +29,0.6572550945249203,sy,8192 +30,0.625,bh,8192 +31,0.7236328125,sa,8192 +32,0.69189453125,om,8192 +33,0.7313915857605178,dj,8192 +34,0.6987522281639929,so,8192 +35,0.62744140625,ly,8192 +36,0.712158203125,ye,8192 +37,0.6586402266288952,td,8192 +38,0.69091796875,tn,8192 +39,0.664306640625,eg,8192 +40,0.703369140625,kw,8192 +41,0.598876953125,ae,8192 +42,0.75,lb,16384 +43,0.665771484375,sd,16384 +44,0.7978515625,ma,16384 +45,0.60986328125,jo,16384 +46,0.72412109375,iq,16384 +47,0.7600884466556108,mr,16384 +48,0.77880859375,dz,16384 +49,0.583984375,qa,16384 +50,0.6813159833046895,sy,16384 +51,0.626708984375,bh,16384 +52,0.73095703125,sa,16384 +53,0.714111328125,om,16384 +54,0.7313915857605178,dj,16384 +55,0.714795008912656,so,16384 +56,0.65234375,ly,16384 +57,0.72314453125,ye,16384 +58,0.6586402266288952,td,16384 +59,0.7041015625,tn,16384 +60,0.6865234375,eg,16384 +61,0.712158203125,kw,16384 +62,0.58984375,ae,16384 diff --git a/quarto/data/en-recall.csv b/quarto/data/en-recall.csv index 7b8abdd..df2df77 100644 --- a/quarto/data/en-recall.csv +++ b/quarto/data/en-recall.csv @@ -1,676 +1,187 @@ ,Recall,Country,Training Size -0,0.5045813586097946,pr,4096 -1,0.51953125,cr,4096 -2,0.48095703125,tr,4096 -3,0.6049638055842813,fj,4096 -4,0.533203125,tw,4096 -5,0.636962890625,lk,4096 -6,0.673583984375,ca,4096 -7,0.669677734375,om,4096 -8,0.5883155883155883,am,4096 -9,0.56591796875,cl,4096 -10,0.552978515625,sd,4096 -11,0.5084175084175084,bl,4096 -12,0.3800753453327752,ml,4096 -13,0.51513671875,vn,4096 -14,0.49677898909811696,ru,4096 -15,0.5117831893165751,ni,4096 -16,0.7060454797559623,bh,4096 -17,0.570556640625,gt,4096 -18,0.48849557522123893,dj,4096 -19,0.51025390625,th,4096 -20,0.701416015625,ua,4096 -21,0.6525573192239859,fo,4096 -22,0.6825119930222416,bi,4096 -23,0.5267459138187222,pe,4096 -24,0.7099609375,bw,4096 -25,0.56396484375,cy,4096 -26,0.661376953125,cm,4096 -27,0.6433391652086978,bn,4096 -28,0.716552734375,so,4096 -29,0.4937888198757764,mn,4096 -30,0.7244147157190636,im,4096 -31,0.64697265625,tz,4096 -32,0.6923828125,sz,4096 -33,0.5761867296092316,ir,4096 -34,0.40014792899408286,bf,4096 -35,0.5215686274509804,gl,4096 -36,0.504150390625,pl,4096 -37,0.6993842810575879,gd,4096 -38,0.8245810055865922,gg,4096 -39,0.568603515625,be,4096 -40,0.683349609375,bm,4096 -41,0.47101449275362317,mr,4096 -42,0.5336,ai,4096 -43,0.68408203125,vc,4096 -44,0.5450346420323325,cw,4096 -45,0.727294921875,ug,4096 -46,0.6962890625,ag,4096 -47,0.527099609375,do,4096 -48,0.5450662110209312,mk,4096 -49,0.602783203125,az,4096 -50,0.594970703125,ch,4096 -51,0.592944369063772,tj,4096 -52,0.5934065934065934,tm,4096 -53,0.6575846833578792,et,4096 -54,0.4714082503556188,la,4096 -55,0.68212890625,ph,4096 -56,0.603271484375,de,4096 -57,0.533935546875,cz,4096 -58,0.528076171875,hu,4096 -59,0.542236328125,ar,4096 -60,0.45780206435944143,gn,4096 -61,0.7060546875,zw,4096 -62,0.5178571428571429,ga,4096 -63,0.6640625,mv,4096 -64,0.4678787878787879,mc,4096 -65,0.588623046875,jp,4096 -66,0.554046997389034,kg,4096 -67,0.584228515625,ly,4096 -68,0.60107421875,aw,4096 -69,0.664306640625,za,4096 -70,0.669677734375,us,4096 -71,0.507080078125,pt,4096 -72,0.668212890625,bb,4096 -73,0.5960324616771867,sc,4096 -74,0.5724541345386865,ci,4096 -75,0.5609756097560976,tl,4096 -76,0.5667736361392249,bj,4096 -77,0.719482421875,bd,4096 -78,0.507531865585168,ad,4096 -79,0.703857421875,bs,4096 -80,0.56103515625,si,4096 -81,0.52392578125,mx,4096 -82,0.578125,hn,4096 -83,0.447021484375,cu,4096 -84,0.7929307805596465,sa,4096 -85,0.7232704402515723,va,4096 -86,0.5830078125,is,4096 -87,0.545654296875,ee,4096 -88,0.5973831775700934,ht,4096 -89,0.7976151043391851,ws,4096 -90,0.44654088050314467,kp,4096 -91,0.5404371584699453,sy,4096 -92,0.5700757575757576,bq,4096 -93,0.6372492836676218,sx,4096 -94,0.763916015625,in,4096 -95,0.608642578125,bg,4096 -96,0.6123046875,bz,4096 -97,0.599853515625,ge,4096 -98,0.61669921875,iq,4096 -99,0.6434359805510534,mp,4096 -100,0.499755859375,lu,4096 -101,0.6611328125,lr,4096 -102,0.6740713765477058,ye,4096 -103,0.6334913112164297,td,4096 -104,0.48090561920349156,bo,4096 -105,0.45357941834451904,pf,4096 -106,0.514404296875,me,4096 -107,0.616455078125,dk,4096 -108,0.55224609375,es,4096 -109,0.5726802070888093,bt,4096 -110,0.6904296875,ls,4096 -111,0.5218978102189781,ck,4096 -112,0.96923828125,li,4096 -113,0.6671259300082667,af,4096 -114,0.5302734375,sg,4096 -115,0.70703125,gh,4096 -116,0.8224195338512763,to,4096 -117,0.54541015625,hr,4096 -118,0.57666015625,fr,4096 -119,0.71142578125,na,4096 -120,0.586181640625,fi,4096 -121,0.54150390625,gr,4096 -122,0.6700434153400868,pw,4096 -123,0.48745046235138706,cg,4096 -124,0.454345703125,tn,4096 -125,0.53857421875,uy,4096 -126,0.652099609375,xk,4096 -127,0.6728515625,rw,4096 -128,0.5980392156862745,gw,4096 -129,0.733154296875,ng,4096 -130,0.546142578125,br,4096 -131,0.56396484375,qa,4096 -132,0.61181640625,at,4096 -133,0.616519174041298,tg,4096 -134,0.568603515625,mz,4096 -135,0.71630859375,tt,4096 -136,0.675048828125,nz,4096 -137,0.602294921875,eg,4096 -138,0.582275390625,no,4096 -139,0.60302734375,sv,4096 -140,0.6407766990291263,fk,4096 -141,0.4453125,by,4096 -142,0.582100777137127,ve,4096 -143,0.489501953125,kh,4096 -144,0.64208984375,sl,4096 -145,0.603759765625,rs,4096 -146,0.5890968266883645,cv,4096 -147,0.620849609375,sn,4096 -148,0.6279920212765957,gu,4096 -149,0.5299550673989016,pa,4096 -150,0.69921875,ke,4096 -151,0.6865284974093264,aq,4096 -152,0.5341796875,ec,4096 -153,0.5478316326530612,sr,4096 -154,0.832,ki,4096 -155,0.5565610859728507,mh,4096 -156,0.619140625,gy,4096 -157,0.39014373716632444,nc,4096 -158,0.609375,gf,4096 -159,0.5219702338766832,ao,4096 -160,0.4840239640539191,dz,4096 -161,0.52197265625,mu,4096 -162,0.710205078125,pk,4096 -163,0.730712890625,ie,4096 -164,0.61328125,my,4096 -165,0.41253071253071255,mm,4096 -166,0.7409326424870466,je,4096 -167,0.7444589308996089,vu,4096 -168,0.57373046875,kr,4096 -169,0.544189453125,jo,4096 -170,0.5771484375,al,4096 -171,0.53076171875,it,4096 -172,0.681058148240579,kw,4096 -173,0.620849609375,ro,4096 -174,0.524169921875,nl,4096 -175,0.618421052631579,dm,4096 -176,0.6685450819672131,pg,4096 -177,0.49963026867143207,lv,4096 -178,0.509765625,cn,4096 -179,0.7223439211391018,kn,4096 -180,0.5173688100517368,ne,4096 -181,0.5193370165745856,yt,4096 -182,0.565673828125,mt,4096 -183,0.7330595482546202,sh,4096 -184,0.6537267080745341,as,4096 -185,0.18012422360248448,um,4096 -186,0.49072265625,co,4096 -187,0.6015037593984962,fm,4096 -188,0.68359375,gi,4096 -189,0.49230769230769234,re,4096 -190,0.5289724439866083,ba,4096 -191,0.4991869918699187,mf,4096 -192,0.4550709406200736,mo,4096 -193,0.603271484375,ky,4096 -194,0.672607421875,lc,4096 -195,0.5528898582333697,hk,4096 -196,0.6243194192377496,vg,4096 -197,0.5920634920634921,gq,4096 -198,0.6088631984585742,cf,4096 -199,0.506103515625,ma,4096 -200,0.669928245270711,tc,4096 -201,0.5104581673306773,py,4096 -202,0.399169921875,kz,4096 -203,0.6884765625,gm,4096 -204,0.6689453125,au,4096 -205,0.695068359375,zm,4096 -206,0.5498046875,se,4096 -207,0.510938602681722,mg,4096 -208,0.7197265625,mw,4096 -209,0.50390625,id,4096 -210,0.5021119324181627,md,4096 -211,0.7445414847161572,sb,4096 -212,0.63427734375,ae,4096 -213,0.54296875,lb,4096 -214,0.5362068965517242,gp,4096 -215,0.63232421875,il,4096 -216,0.70654296875,gb,4096 -217,0.5830078125,sk,4096 -218,0.44390243902439025,mq,4096 -219,0.5390471512770137,cd,4096 -220,0.67041015625,np,4096 -221,0.390869140625,uz,4096 -222,0.65625,jm,4096 -223,0.6210045662100456,vi,4096 -224,0.531494140625,lt,4096 -225,0.5380726698262244,pr,8192 -226,0.552490234375,cr,8192 -227,0.51904296875,tr,8192 -228,0.6385729058945191,fj,8192 -229,0.562744140625,tw,8192 -230,0.660888671875,lk,8192 -231,0.69140625,ca,8192 -232,0.6796875,om,8192 -233,0.6123396123396123,am,8192 -234,0.577392578125,cl,8192 -235,0.559326171875,sd,8192 -236,0.5286195286195287,bl,8192 -237,0.3909585600669736,ml,8192 -238,0.5458984375,vn,8192 -239,0.5166005946481665,ru,8192 -240,0.5263157894736842,ni,8192 -241,0.704381586245147,bh,8192 -242,0.58447265625,gt,8192 -243,0.5256637168141592,dj,8192 -244,0.537353515625,th,8192 -245,0.716796875,ua,8192 -246,0.6596119929453262,fo,8192 -247,0.691234191016136,bi,8192 -248,0.54210004952947,pe,8192 -249,0.723876953125,bw,8192 -250,0.572998046875,cy,8192 -251,0.6787109375,cm,8192 -252,0.6510872281929517,bn,8192 -253,0.733642578125,so,8192 -254,0.4956521739130435,mn,8192 -255,0.7438127090301003,im,8192 -256,0.6728515625,tz,8192 -257,0.708740234375,sz,8192 -258,0.6260162601626016,ir,8192 -259,0.4238165680473373,bf,8192 -260,0.5333333333333333,gl,8192 -261,0.5107421875,pl,8192 -262,0.7102499094530967,gd,8192 -263,0.8279329608938547,gg,8192 -264,0.592041015625,be,8192 -265,0.705810546875,bm,8192 -266,0.5126811594202898,mr,8192 -267,0.5744,ai,8192 -268,0.6884765625,vc,8192 -269,0.5640877598152425,cw,8192 -270,0.751708984375,ug,8192 -271,0.7041015625,ag,8192 -272,0.5458984375,do,8192 -273,0.5655702691157625,mk,8192 -274,0.632568359375,az,8192 -275,0.61474609375,ch,8192 -276,0.621438263229308,tj,8192 -277,0.5989010989010989,tm,8192 -278,0.6703485517918507,et,8192 -279,0.5095305832147937,la,8192 -280,0.694091796875,ph,8192 -281,0.611328125,de,8192 -282,0.5390625,cz,8192 -283,0.54638671875,hu,8192 -284,0.57421875,ar,8192 -285,0.5009107468123861,gn,8192 -286,0.72900390625,zw,8192 -287,0.5293367346938775,ga,8192 -288,0.679931640625,mv,8192 -289,0.48424242424242425,mc,8192 -290,0.600830078125,jp,8192 -291,0.5754569190600523,kg,8192 -292,0.6083984375,ly,8192 -293,0.64013671875,aw,8192 -294,0.68798828125,za,8192 -295,0.684814453125,us,8192 -296,0.54345703125,pt,8192 -297,0.687255859375,bb,8192 -298,0.6095581605049594,sc,8192 -299,0.59611805370912,ci,8192 -300,0.5420054200542005,tl,8192 -301,0.6028141199703777,bj,8192 -302,0.7421875,bd,8192 -303,0.5422943221320974,ad,8192 -304,0.7041015625,bs,8192 -305,0.580078125,si,8192 -306,0.5517578125,mx,8192 -307,0.60302734375,hn,8192 -308,0.476318359375,cu,8192 -309,0.8002945508100148,sa,8192 -310,0.7371069182389937,va,8192 -311,0.60595703125,is,8192 -312,0.554931640625,ee,8192 -313,0.6198130841121495,ht,8192 -314,0.8082146406094733,ws,8192 -315,0.4779874213836478,kp,8192 -316,0.5647540983606557,sy,8192 -317,0.5795454545454546,bq,8192 -318,0.6498567335243552,sx,8192 -319,0.779052734375,in,8192 -320,0.62451171875,bg,8192 -321,0.6318359375,bz,8192 -322,0.6162109375,ge,8192 -323,0.638671875,iq,8192 -324,0.6969205834683955,mp,8192 -325,0.527099609375,lu,8192 -326,0.6923828125,lr,8192 -327,0.6922796795338675,ye,8192 -328,0.6429699842022117,td,8192 -329,0.4896344789961811,bo,8192 -330,0.47315436241610737,pf,8192 -331,0.52490234375,me,8192 -332,0.626953125,dk,8192 -333,0.56103515625,es,8192 -334,0.5818399044205496,bt,8192 -335,0.711669921875,ls,8192 -336,0.5255474452554745,ck,8192 -337,0.9697265625,li,8192 -338,0.6960595205290714,af,8192 -339,0.5419921875,sg,8192 -340,0.725341796875,gh,8192 -341,0.8213096559378469,to,8192 -342,0.577392578125,hr,8192 -343,0.578369140625,fr,8192 -344,0.73388671875,na,8192 -345,0.5966796875,fi,8192 -346,0.55029296875,gr,8192 -347,0.6483357452966715,pw,8192 -348,0.5376486129458389,cg,8192 -349,0.462890625,tn,8192 -350,0.55419921875,uy,8192 -351,0.66796875,xk,8192 -352,0.698486328125,rw,8192 -353,0.5980392156862745,gw,8192 -354,0.747802734375,ng,8192 -355,0.572021484375,br,8192 -356,0.57275390625,qa,8192 -357,0.621826171875,at,8192 -358,0.6456489675516224,tg,8192 -359,0.57470703125,mz,8192 -360,0.736083984375,tt,8192 -361,0.69140625,nz,8192 -362,0.606201171875,eg,8192 -363,0.600341796875,no,8192 -364,0.62109375,sv,8192 -365,0.6844660194174758,fk,8192 -366,0.45132211538461536,by,8192 -367,0.6084231637001755,ve,8192 -368,0.52685546875,kh,8192 -369,0.66748046875,sl,8192 -370,0.623046875,rs,8192 -371,0.6074043938161107,cv,8192 -372,0.63818359375,sn,8192 -373,0.6319813829787234,gu,8192 -374,0.54618072890664,pa,8192 -375,0.72607421875,ke,8192 -376,0.6968911917098446,aq,8192 -377,0.5546875,ec,8192 -378,0.5829081632653061,sr,8192 -379,0.832,ki,8192 -380,0.579185520361991,mh,8192 -381,0.62646484375,gy,8192 -382,0.4209445585215606,nc,8192 -383,0.6145833333333334,gf,8192 -384,0.5347271438695961,ao,8192 -385,0.5022466300549177,dz,8192 -386,0.548095703125,mu,8192 -387,0.73046875,pk,8192 -388,0.743408203125,ie,8192 -389,0.6162109375,my,8192 -390,0.43316953316953316,mm,8192 -391,0.7409326424870466,je,8192 -392,0.7535853976531942,vu,8192 -393,0.58056640625,kr,8192 -394,0.554931640625,jo,8192 -395,0.603271484375,al,8192 -396,0.53662109375,it,8192 -397,0.6932867481906664,kw,8192 -398,0.623046875,ro,8192 -399,0.55029296875,nl,8192 -400,0.6491228070175439,dm,8192 -401,0.6944159836065574,pg,8192 -402,0.5178703475474489,lv,8192 -403,0.550537109375,cn,8192 -404,0.7445235487404163,kn,8192 -405,0.5424981522542498,ne,8192 -406,0.5193370165745856,yt,8192 -407,0.576904296875,mt,8192 -408,0.7330595482546202,sh,8192 -409,0.6413043478260869,as,8192 -410,0.2236024844720497,um,8192 -411,0.5068359375,co,8192 -412,0.6165413533834586,fm,8192 -413,0.704833984375,gi,8192 -414,0.5115384615384615,re,8192 -415,0.5366984290497039,ba,8192 -416,0.5121951219512195,mf,8192 -417,0.47871781397792956,mo,8192 -418,0.623291015625,ky,8192 -419,0.680419921875,lc,8192 -420,0.549618320610687,hk,8192 -421,0.6418632788868723,vg,8192 -422,0.546031746031746,gq,8192 -423,0.630057803468208,cf,8192 -424,0.529296875,ma,8192 -425,0.6705805609915199,tc,8192 -426,0.5378486055776892,py,8192 -427,0.42431640625,kz,8192 -428,0.725830078125,gm,8192 -429,0.700927734375,au,8192 -430,0.71240234375,zm,8192 -431,0.56982421875,se,8192 -432,0.5462244177840508,mg,8192 -433,0.737548828125,mw,8192 -434,0.52001953125,id,8192 -435,0.5158394931362197,md,8192 -436,0.7838427947598253,sb,8192 -437,0.666015625,ae,8192 -438,0.576416015625,lb,8192 -439,0.5724137931034483,gp,8192 -440,0.64013671875,il,8192 -441,0.721435546875,gb,8192 -442,0.5869140625,sk,8192 -443,0.4813008130081301,mq,8192 -444,0.5577111984282908,cd,8192 -445,0.67822265625,np,8192 -446,0.411376953125,uz,8192 -447,0.674072265625,jm,8192 -448,0.6529680365296804,vi,8192 -449,0.55419921875,lt,8192 -450,0.5620853080568721,pr,16384 -451,0.568603515625,cr,16384 -452,0.537109375,tr,16384 -453,0.6577042399172699,fj,16384 -454,0.576416015625,tw,16384 -455,0.6708984375,lk,16384 -456,0.694091796875,ca,16384 -457,0.683837890625,om,16384 -458,0.6369096369096369,am,16384 -459,0.59912109375,cl,16384 -460,0.566162109375,sd,16384 -461,0.531986531986532,bl,16384 -462,0.39640016743407286,ml,16384 -463,0.58349609375,vn,16384 -464,0.541625371655104,ru,16384 -465,0.5506677140612726,ni,16384 -466,0.7063227953410982,bh,16384 -467,0.594970703125,gt,16384 -468,0.5380530973451327,dj,16384 -469,0.54443359375,th,16384 -470,0.7353515625,ua,16384 -471,0.6754850088183422,fo,16384 -472,0.6986480593109463,bi,16384 -473,0.5596830113917781,pe,16384 -474,0.73779296875,bw,16384 -475,0.5771484375,cy,16384 -476,0.703369140625,cm,16384 -477,0.6535866033491627,bn,16384 -478,0.753173828125,so,16384 -479,0.5118012422360249,mn,16384 -480,0.7672240802675585,im,16384 -481,0.693115234375,tz,16384 -482,0.732421875,sz,16384 -483,0.6682402307894046,ir,16384 -484,0.4171597633136095,bf,16384 -485,0.5725490196078431,gl,16384 -486,0.538818359375,pl,16384 -487,0.7149583484244839,gd,16384 -488,0.8340782122905028,gg,16384 -489,0.600341796875,be,16384 -490,0.723388671875,bm,16384 -491,0.5108695652173914,mr,16384 -492,0.6032,ai,16384 -493,0.7138671875,vc,16384 -494,0.5923787528868361,cw,16384 -495,0.759033203125,ug,16384 -496,0.7080078125,ag,16384 -497,0.569580078125,do,16384 -498,0.5736864587782998,mk,16384 -499,0.640869140625,az,16384 -500,0.61962890625,ch,16384 -501,0.621438263229308,tj,16384 -502,0.6098901098901099,tm,16384 -503,0.6914580265095729,et,16384 -504,0.534850640113798,la,16384 -505,0.71240234375,ph,16384 -506,0.616943359375,de,16384 -507,0.55859375,cz,16384 -508,0.547119140625,hu,16384 -509,0.5888671875,ar,16384 -510,0.5270188221007893,gn,16384 -511,0.745361328125,zw,16384 -512,0.5599489795918368,ga,16384 -513,0.700439453125,mv,16384 -514,0.5006060606060606,mc,16384 -515,0.615234375,jp,16384 -516,0.5859007832898172,kg,16384 -517,0.6083984375,ly,16384 -518,0.6435546875,aw,16384 -519,0.719482421875,za,16384 -520,0.7119140625,us,16384 -521,0.54248046875,pt,16384 -522,0.705078125,bb,16384 -523,0.6041478809738503,sc,16384 -524,0.6216431800053177,ci,16384 -525,0.5663956639566395,tl,16384 -526,0.6146630461614416,bj,16384 -527,0.757568359375,bd,16384 -528,0.5596755504055619,ad,16384 -529,0.726806640625,bs,16384 -530,0.605712890625,si,16384 -531,0.561767578125,mx,16384 -532,0.620849609375,hn,16384 -533,0.496826171875,cu,16384 -534,0.8156111929307805,sa,16384 -535,0.7509433962264151,va,16384 -536,0.616455078125,is,16384 -537,0.58154296875,ee,16384 -538,0.6328971962616823,ht,16384 -539,0.8254388870486916,ws,16384 -540,0.46540880503144655,kp,16384 -541,0.5931693989071039,sy,16384 -542,0.5965909090909091,bq,16384 -543,0.6653295128939828,sx,16384 -544,0.80029296875,in,16384 -545,0.65673828125,bg,16384 -546,0.645263671875,bz,16384 -547,0.6337890625,ge,16384 -548,0.666015625,iq,16384 -549,0.6742301458670988,mp,16384 -550,0.533203125,lu,16384 -551,0.711181640625,lr,16384 -552,0.6970138383102695,ye,16384 -553,0.6429699842022117,td,16384 -554,0.5220949263502455,bo,16384 -555,0.4748322147651007,pf,16384 -556,0.538330078125,me,16384 -557,0.627197265625,dk,16384 -558,0.581787109375,es,16384 -559,0.6152927120669056,bt,16384 -560,0.7353515625,ls,16384 -561,0.5255474452554745,ck,16384 -562,0.971435546875,li,16384 -563,0.6957839625241113,af,16384 -564,0.55517578125,sg,16384 -565,0.734375,gh,16384 -566,0.8346281908990011,to,16384 -567,0.595947265625,hr,16384 -568,0.594970703125,fr,16384 -569,0.745361328125,na,16384 -570,0.626953125,fi,16384 -571,0.560791015625,gr,16384 -572,0.6483357452966715,pw,16384 -573,0.5429326287978864,cg,16384 -574,0.47705078125,tn,16384 -575,0.57177734375,uy,16384 -576,0.675048828125,xk,16384 -577,0.70703125,rw,16384 -578,0.5980392156862745,gw,16384 -579,0.76025390625,ng,16384 -580,0.58544921875,br,16384 -581,0.572509765625,qa,16384 -582,0.619873046875,at,16384 -583,0.6670353982300885,tg,16384 -584,0.59228515625,mz,16384 -585,0.7451171875,tt,16384 -586,0.700439453125,nz,16384 -587,0.618408203125,eg,16384 -588,0.61474609375,no,16384 -589,0.64013671875,sv,16384 -590,0.691747572815534,fk,16384 -591,0.49759615384615385,by,16384 -592,0.6227124592629731,ve,16384 -593,0.538818359375,kh,16384 -594,0.685546875,sl,16384 -595,0.655029296875,rs,16384 -596,0.5996745321399511,cv,16384 -597,0.6552734375,sn,16384 -598,0.6396276595744681,gu,16384 -599,0.582626060908637,pa,16384 -600,0.737548828125,ke,16384 -601,0.7046632124352331,aq,16384 -602,0.5849609375,ec,16384 -603,0.5975765306122449,sr,16384 -604,0.832,ki,16384 -605,0.6244343891402715,mh,16384 -606,0.63818359375,gy,16384 -607,0.42299794661190965,nc,16384 -608,0.6145833333333334,gf,16384 -609,0.5478384124734231,ao,16384 -610,0.5192211682476285,dz,16384 -611,0.5322265625,mu,16384 -612,0.745849609375,pk,16384 -613,0.747314453125,ie,16384 -614,0.62646484375,my,16384 -615,0.487960687960688,mm,16384 -616,0.772020725388601,je,16384 -617,0.7614080834419817,vu,16384 -618,0.5771484375,kr,16384 -619,0.55810546875,jo,16384 -620,0.624755859375,al,16384 -621,0.557861328125,it,16384 -622,0.6912902420763664,kw,16384 -623,0.644775390625,ro,16384 -624,0.579345703125,nl,16384 -625,0.637719298245614,dm,16384 -626,0.710297131147541,pg,16384 -627,0.5299482376140005,lv,16384 -628,0.5888671875,cn,16384 -629,0.7541073384446878,kn,16384 -630,0.5439763488543976,ne,16384 -631,0.5193370165745856,yt,16384 -632,0.587158203125,mt,16384 -633,0.7330595482546202,sh,16384 -634,0.6366459627329193,as,16384 -635,0.2236024844720497,um,16384 -636,0.52587890625,co,16384 -637,0.6165413533834586,fm,16384 -638,0.72998046875,gi,16384 -639,0.525,re,16384 -640,0.5647695081122843,ba,16384 -641,0.4991869918699187,mf,16384 -642,0.464004203888597,mo,16384 -643,0.634521484375,ky,16384 -644,0.686767578125,lc,16384 -645,0.5823336968375137,hk,16384 -646,0.6430732002419842,vg,16384 -647,0.546031746031746,gq,16384 -648,0.630057803468208,cf,16384 -649,0.54150390625,ma,16384 -650,0.6823222439660795,tc,16384 -651,0.5672310756972112,py,16384 -652,0.44189453125,kz,16384 -653,0.74267578125,gm,16384 -654,0.718505859375,au,16384 -655,0.724609375,zm,16384 -656,0.57958984375,se,16384 -657,0.5702187720536345,mg,16384 -658,0.741943359375,mw,16384 -659,0.548828125,id,16384 -660,0.5242872228088701,md,16384 -661,0.7838427947598253,sb,16384 -662,0.66162109375,ae,16384 -663,0.593994140625,lb,16384 -664,0.5862068965517241,gp,16384 -665,0.662109375,il,16384 -666,0.73486328125,gb,16384 -667,0.599853515625,sk,16384 -668,0.5024390243902439,mq,16384 -669,0.5778487229862476,cd,16384 -670,0.6982421875,np,16384 -671,0.42578125,uz,16384 -672,0.69140625,jm,16384 -673,0.680365296803653,vi,16384 -674,0.562255859375,lt,16384 +0,0.5858324715615305,fj,4096 +1,0.6396484375,ca,4096 +2,0.55810546875,sd,4096 +3,0.635986328125,cm,4096 +4,0.6976588628762542,im,4096 +5,0.656494140625,sz,4096 +6,0.6704092720028975,gd,4096 +7,0.8050279329608938,gg,4096 +8,0.658203125,bm,4096 +9,0.5336,ai,4096 +10,0.663330078125,vc,4096 +11,0.6943359375,ug,4096 +12,0.670654296875,ag,4096 +13,0.696044921875,ph,4096 +14,0.671630859375,zw,4096 +15,0.624755859375,za,4096 +16,0.64794921875,us,4096 +17,0.651611328125,bb,4096 +18,0.690185546875,bs,4096 +19,0.6263610315186247,sx,4096 +20,0.76318359375,in,4096 +21,0.598388671875,bz,4096 +22,0.6515397082658023,mp,4096 +23,0.647705078125,lr,4096 +24,0.6572265625,ls,4096 +25,0.43795620437956206,ck,4096 +26,0.534912109375,sg,4096 +27,0.8057713651498335,to,4096 +28,0.690185546875,na,4096 +29,0.6439942112879884,pw,4096 +30,0.64794921875,rw,4096 +31,0.708984375,ng,4096 +32,0.696533203125,tt,4096 +33,0.616455078125,nz,4096 +34,0.6092233009708737,fk,4096 +35,0.6171875,sl,4096 +36,0.6117021276595744,gu,4096 +37,0.67333984375,ke,4096 +38,0.6015625,gy,4096 +39,0.486328125,mu,4096 +40,0.691650390625,pk,4096 +41,0.7001953125,ie,4096 +42,0.7235984354628422,vu,4096 +43,0.6070175438596491,dm,4096 +44,0.6324282786885246,pg,4096 +45,0.7009857612267251,kn,4096 +46,0.5400390625,mt,4096 +47,0.6950718685831622,sh,4096 +48,0.556390977443609,fm,4096 +49,0.665283203125,gi,4096 +50,0.597412109375,ky,4096 +51,0.656005859375,lc,4096 +52,0.6073805202661827,vg,4096 +53,0.6454664057403784,tc,4096 +54,0.67529296875,gm,4096 +55,0.6396484375,au,4096 +56,0.66455078125,zm,4096 +57,0.67578125,mw,4096 +58,0.7489082969432315,sb,4096 +59,0.681884765625,gb,4096 +60,0.62646484375,jm,4096 +61,0.6118721461187214,vi,4096 +62,0.6168562564632886,fj,8192 +63,0.64794921875,ca,8192 +64,0.565673828125,sd,8192 +65,0.6494140625,cm,8192 +66,0.703010033444816,im,8192 +67,0.667236328125,sz,8192 +68,0.6638898949655921,gd,8192 +69,0.7910614525139665,gg,8192 +70,0.67138671875,bm,8192 +71,0.5464,ai,8192 +72,0.662109375,vc,8192 +73,0.716796875,ug,8192 +74,0.670654296875,ag,8192 +75,0.719482421875,ph,8192 +76,0.695556640625,zw,8192 +77,0.6533203125,za,8192 +78,0.674560546875,us,8192 +79,0.658203125,bb,8192 +80,0.686767578125,bs,8192 +81,0.6372492836676218,sx,8192 +82,0.77734375,in,8192 +83,0.601806640625,bz,8192 +84,0.6482982171799028,mp,8192 +85,0.659423828125,lr,8192 +86,0.676025390625,ls,8192 +87,0.4708029197080292,ck,8192 +88,0.541748046875,sg,8192 +89,0.8113207547169812,to,8192 +90,0.71044921875,na,8192 +91,0.6150506512301013,pw,8192 +92,0.660400390625,rw,8192 +93,0.70947265625,ng,8192 +94,0.7080078125,tt,8192 +95,0.639892578125,nz,8192 +96,0.6577669902912622,fk,8192 +97,0.631591796875,sl,8192 +98,0.5950797872340425,gu,8192 +99,0.68505859375,ke,8192 +100,0.5947265625,gy,8192 +101,0.500244140625,mu,8192 +102,0.715087890625,pk,8192 +103,0.710205078125,ie,8192 +104,0.7444589308996089,vu,8192 +105,0.6228070175438597,dm,8192 +106,0.6567622950819673,pg,8192 +107,0.7146768893756845,kn,8192 +108,0.550537109375,mt,8192 +109,0.6950718685831622,sh,8192 +110,0.5827067669172933,fm,8192 +111,0.684814453125,gi,8192 +112,0.583984375,ky,8192 +113,0.653564453125,lc,8192 +114,0.6079854809437386,vg,8192 +115,0.639921722113503,tc,8192 +116,0.68798828125,gm,8192 +117,0.662109375,au,8192 +118,0.669189453125,zm,8192 +119,0.683349609375,mw,8192 +120,0.7816593886462883,sb,8192 +121,0.700927734375,gb,8192 +122,0.643798828125,jm,8192 +123,0.639269406392694,vi,8192 +124,0.6416752843846949,fj,16384 +125,0.6611328125,ca,16384 +126,0.583984375,sd,16384 +127,0.673828125,cm,16384 +128,0.7384615384615385,im,16384 +129,0.6796875,sz,16384 +130,0.6896052155016299,gd,16384 +131,0.8117318435754189,gg,16384 +132,0.684326171875,bm,16384 +133,0.5792,ai,16384 +134,0.692138671875,vc,16384 +135,0.734375,ug,16384 +136,0.688720703125,ag,16384 +137,0.736328125,ph,16384 +138,0.71044921875,zw,16384 +139,0.679443359375,za,16384 +140,0.693359375,us,16384 +141,0.676025390625,bb,16384 +142,0.70166015625,bs,16384 +143,0.6435530085959885,sx,16384 +144,0.798828125,in,16384 +145,0.62353515625,bz,16384 +146,0.6547811993517018,mp,16384 +147,0.681640625,lr,16384 +148,0.7060546875,ls,16384 +149,0.4708029197080292,ck,16384 +150,0.55859375,sg,16384 +151,0.8190899001109878,to,16384 +152,0.720703125,na,16384 +153,0.6150506512301013,pw,16384 +154,0.67529296875,rw,16384 +155,0.72607421875,ng,16384 +156,0.727783203125,tt,16384 +157,0.65625,nz,16384 +158,0.662621359223301,fk,16384 +159,0.658447265625,sl,16384 +160,0.6123670212765957,gu,16384 +161,0.70849609375,ke,16384 +162,0.614501953125,gy,16384 +163,0.505859375,mu,16384 +164,0.7333984375,pk,16384 +165,0.7314453125,ie,16384 +166,0.7496740547588006,vu,16384 +167,0.6464912280701754,dm,16384 +168,0.6703381147540983,pg,16384 +169,0.7332968236582694,kn,16384 +170,0.568359375,mt,16384 +171,0.6950718685831622,sh,16384 +172,0.5827067669172933,fm,16384 +173,0.700439453125,gi,16384 +174,0.599609375,ky,16384 +175,0.675537109375,lc,16384 +176,0.6225045372050817,vg,16384 +177,0.6650358773646445,tc,16384 +178,0.716552734375,gm,16384 +179,0.682373046875,au,16384 +180,0.6904296875,zm,16384 +181,0.710205078125,mw,16384 +182,0.7816593886462883,sb,16384 +183,0.711181640625,gb,16384 +184,0.66162109375,jm,16384 +185,0.6666666666666666,vi,16384 From 6d6de8b6729ed3bcd2427de0073440ae788b1c44 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 29 May 2024 05:40:45 -0600 Subject: [PATCH 08/15] French --- quarto/data/fr-recall.csv | 372 ++++++++------------------------------ 1 file changed, 78 insertions(+), 294 deletions(-) diff --git a/quarto/data/fr-recall.csv b/quarto/data/fr-recall.csv index 2bb3383..4e2dcf2 100644 --- a/quarto/data/fr-recall.csv +++ b/quarto/data/fr-recall.csv @@ -1,295 +1,79 @@ ,Recall,Country,Training Size -0,0.5737179487179487,cr,4096 -1,0.5947265625,tr,4096 -2,0.642822265625,ca,4096 -3,0.6341059602649006,cl,4096 -4,0.66552734375,ml,4096 -5,0.6710680751173709,vn,4096 -6,0.5572868927589367,ru,4096 -7,0.868824531516184,bh,4096 -8,0.6123269316659223,dj,4096 -9,0.49204795693662834,th,4096 -10,0.7342007434944238,ua,4096 -11,0.799560546875,bi,4096 -12,0.5862068965517241,pe,4096 -13,0.503448275862069,cy,4096 -14,0.72119140625,cm,4096 -15,0.932373046875,tz,4096 -16,0.746337890625,bf,4096 -17,0.5247831474597274,pl,4096 -18,0.60107421875,be,4096 -19,0.6066619418851878,mr,4096 -20,0.7201492537313433,ug,4096 -21,0.5749851807943094,do,4096 -22,0.56298828125,ch,4096 -23,0.594478527607362,ph,4096 -24,0.547607421875,de,4096 -25,0.5083916083916084,cz,4096 -26,0.6036240090600227,hu,4096 -27,0.6712590891695369,ar,4096 -28,0.693359375,gn,4096 -29,0.6865234375,ga,4096 -30,0.671630859375,mc,4096 -31,0.59912109375,jp,4096 -32,0.550537109375,pt,4096 -33,0.62255859375,us,4096 -34,0.7193390136844823,za,4096 -35,0.637451171875,ci,4096 -36,0.699462890625,bj,4096 -37,0.652587890625,mx,4096 -38,0.666824644549763,sa,4096 -39,0.760498046875,ht,4096 -40,0.864501953125,in,4096 -41,0.5588235294117647,bg,4096 -42,0.510498046875,lu,4096 -43,0.716796875,td,4096 -44,0.5362255965292841,pf,4096 -45,0.5601415094339622,dk,4096 -46,0.595703125,es,4096 -47,0.4388888888888889,sg,4096 -48,0.6819693945442449,gh,4096 -49,0.6061120543293718,hr,4096 -50,0.651123046875,fr,4096 -51,0.586104513064133,fi,4096 -52,0.5758799769186382,gr,4096 -53,0.68017578125,cg,4096 -54,0.585205078125,tn,4096 -55,0.7430025445292621,rw,4096 -56,0.7063136456211813,ng,4096 -57,0.660888671875,br,4096 -58,0.5917901938426454,at,4096 -59,0.70068359375,tg,4096 -60,0.6364605543710021,nz,4096 -61,0.5468901063486948,eg,4096 -62,0.5294117647058824,no,4096 -63,0.7419786096256684,rs,4096 -64,0.691650390625,sn,4096 -65,0.6538849646821393,ke,4096 -66,0.4931640625,gf,4096 -67,0.5708454810495627,nc,4096 -68,0.66259765625,dz,4096 -69,0.5388040712468194,mu,4096 -70,0.6823425022182786,pk,4096 -71,0.6040022547914318,ie,4096 -72,0.5731303772336201,my,4096 -73,0.7452550415183867,kr,4096 -74,0.578125,it,4096 -75,0.5145601921344941,yt,4096 -76,0.701171875,ne,4096 -77,0.58154296875,nl,4096 -78,0.5672043010752689,ro,4096 -79,0.5795363709032774,cn,4096 -80,0.5798429319371727,mt,4096 -81,0.6828978622327792,co,4096 -82,0.56640625,re,4096 -83,0.5099403578528827,hk,4096 -84,0.2942008486562942,cf,4096 -85,0.564697265625,ma,4096 -86,0.50146484375,au,4096 -87,0.6356962025316456,se,4096 -88,0.6519661889011393,mg,4096 -89,0.6072035338090384,id,4096 -90,0.6733870967741935,km,4096 -91,0.6103041929295697,ae,4096 -92,0.699009900990099,lb,4096 -93,0.63037109375,gp,4096 -94,0.6027801179443977,il,4096 -95,0.597900390625,gb,4096 -96,0.62939453125,mq,4096 -97,0.79736328125,cd,4096 -98,0.5737179487179487,cr,8192 -99,0.593017578125,tr,8192 -100,0.65966796875,ca,8192 -101,0.6341059602649006,cl,8192 -102,0.6884765625,ml,8192 -103,0.6830985915492958,vn,8192 -104,0.5536205316223648,ru,8192 -105,0.868824531516184,bh,8192 -106,0.6252791424743189,dj,8192 -107,0.5113775385368241,th,8192 -108,0.7342007434944238,ua,8192 -109,0.81005859375,bi,8192 -110,0.5862068965517241,pe,8192 -111,0.503448275862069,cy,8192 -112,0.7490234375,cm,8192 -113,0.93310546875,tz,8192 -114,0.766357421875,bf,8192 -115,0.5297397769516728,pl,8192 -116,0.59375,be,8192 -117,0.6059532246633593,mr,8192 -118,0.7276119402985075,ug,8192 -119,0.5862477771191464,do,8192 -120,0.5654296875,ch,8192 -121,0.5901840490797546,ph,8192 -122,0.56884765625,de,8192 -123,0.5083916083916084,cz,8192 -124,0.6036240090600227,hu,8192 -125,0.6689628779181018,ar,8192 -126,0.71826171875,gn,8192 -127,0.70849609375,ga,8192 -128,0.6787109375,mc,8192 -129,0.615478515625,jp,8192 -130,0.5546875,pt,8192 -131,0.638671875,us,8192 -132,0.7348308804544281,za,8192 -133,0.649658203125,ci,8192 -134,0.718994140625,bj,8192 -135,0.64306640625,mx,8192 -136,0.6781990521327014,sa,8192 -137,0.790771484375,ht,8192 -138,0.870361328125,in,8192 -139,0.5588235294117647,bg,8192 -140,0.51806640625,lu,8192 -141,0.733154296875,td,8192 -142,0.5535791757049892,pf,8192 -143,0.5601415094339622,dk,8192 -144,0.6044921875,es,8192 -145,0.4388888888888889,sg,8192 -146,0.6833000665335994,gh,8192 -147,0.6196943972835314,hr,8192 -148,0.659912109375,fr,8192 -149,0.5926365795724465,fi,8192 -150,0.5920369301788806,gr,8192 -151,0.690673828125,cg,8192 -152,0.58349609375,tn,8192 -153,0.7393675027262814,rw,8192 -154,0.709572301425662,ng,8192 -155,0.677978515625,br,8192 -156,0.5917901938426454,at,8192 -157,0.71923828125,tg,8192 -158,0.6364605543710021,nz,8192 -159,0.5472123751208507,eg,8192 -160,0.5294117647058824,no,8192 -161,0.7486631016042781,rs,8192 -162,0.71337890625,sn,8192 -163,0.6564076690211907,ke,8192 -164,0.523681640625,gf,8192 -165,0.5527696793002915,nc,8192 -166,0.6689453125,dz,8192 -167,0.5388040712468194,mu,8192 -168,0.6823425022182786,pk,8192 -169,0.608793686583991,ie,8192 -170,0.5731303772336201,my,8192 -171,0.7523724792408066,kr,8192 -172,0.585693359375,it,8192 -173,0.526868808165716,yt,8192 -174,0.70751953125,ne,8192 -175,0.595703125,nl,8192 -176,0.5672043010752689,ro,8192 -177,0.5851318944844125,cn,8192 -178,0.574607329842932,mt,8192 -179,0.6980403800475059,co,8192 -180,0.577880859375,re,8192 -181,0.5099403578528827,hk,8192 -182,0.34276284771334276,cf,8192 -183,0.588623046875,ma,8192 -184,0.49853515625,au,8192 -185,0.64,se,8192 -186,0.6626240352811467,mg,8192 -187,0.6153584777437988,id,8192 -188,0.6745391705069125,km,8192 -189,0.619073718827076,ae,8192 -190,0.7141914191419142,lb,8192 -191,0.637451171875,gp,8192 -192,0.6246840775063185,il,8192 -193,0.604248046875,gb,8192 -194,0.63818359375,mq,8192 -195,0.811767578125,cd,8192 -196,0.5737179487179487,cr,16384 -197,0.599609375,tr,16384 -198,0.67724609375,ca,16384 -199,0.6341059602649006,cl,16384 -200,0.714111328125,ml,16384 -201,0.6830985915492958,vn,16384 -202,0.5756186984417965,ru,16384 -203,0.868824531516184,bh,16384 -204,0.6252791424743189,dj,16384 -205,0.5113775385368241,th,16384 -206,0.7342007434944238,ua,16384 -207,0.80810546875,bi,16384 -208,0.5862068965517241,pe,16384 -209,0.503448275862069,cy,16384 -210,0.771728515625,cm,16384 -211,0.935791015625,tz,16384 -212,0.786376953125,bf,16384 -213,0.5297397769516728,pl,16384 -214,0.615966796875,be,16384 -215,0.6059532246633593,mr,16384 -216,0.7276119402985075,ug,16384 -217,0.5862477771191464,do,16384 -218,0.575439453125,ch,16384 -219,0.5901840490797546,ph,16384 -220,0.57275390625,de,16384 -221,0.5083916083916084,cz,16384 -222,0.6036240090600227,hu,16384 -223,0.6689628779181018,ar,16384 -224,0.732177734375,gn,16384 -225,0.722412109375,ga,16384 -226,0.6962890625,mc,16384 -227,0.6259765625,jp,16384 -228,0.569091796875,pt,16384 -229,0.651123046875,us,16384 -230,0.7526465272398657,za,16384 -231,0.66845703125,ci,16384 -232,0.743408203125,bj,16384 -233,0.64306640625,mx,16384 -234,0.6781990521327014,sa,16384 -235,0.803955078125,ht,16384 -236,0.871337890625,in,16384 -237,0.5588235294117647,bg,16384 -238,0.5322265625,lu,16384 -239,0.7548828125,td,16384 -240,0.5535791757049892,pf,16384 -241,0.5601415094339622,dk,16384 -242,0.60595703125,es,16384 -243,0.4388888888888889,sg,16384 -244,0.6833000665335994,gh,16384 -245,0.6196943972835314,hr,16384 -246,0.679443359375,fr,16384 -247,0.5926365795724465,fi,16384 -248,0.6162723600692441,gr,16384 -249,0.708251953125,cg,16384 -250,0.591552734375,tn,16384 -251,0.7393675027262814,rw,16384 -252,0.709572301425662,ng,16384 -253,0.68408203125,br,16384 -254,0.5917901938426454,at,16384 -255,0.740478515625,tg,16384 -256,0.6364605543710021,nz,16384 -257,0.5552690944247503,eg,16384 -258,0.5294117647058824,no,16384 -259,0.7486631016042781,rs,16384 -260,0.7265625,sn,16384 -261,0.6564076690211907,ke,16384 -262,0.538818359375,gf,16384 -263,0.5527696793002915,nc,16384 -264,0.676025390625,dz,16384 -265,0.5388040712468194,mu,16384 -266,0.6823425022182786,pk,16384 -267,0.608793686583991,ie,16384 -268,0.5731303772336201,my,16384 -269,0.7544483985765125,kr,16384 -270,0.58154296875,it,16384 -271,0.5208646052236565,yt,16384 -272,0.734375,ne,16384 -273,0.611572265625,nl,16384 -274,0.5672043010752689,ro,16384 -275,0.5851318944844125,cn,16384 -276,0.574607329842932,mt,16384 -277,0.6980403800475059,co,16384 -278,0.589111328125,re,16384 -279,0.5099403578528827,hk,16384 -280,0.3785950023573786,cf,16384 -281,0.589111328125,ma,16384 -282,0.5166015625,au,16384 -283,0.6430379746835443,se,16384 -284,0.6703417861080485,mg,16384 -285,0.6153584777437988,id,16384 -286,0.6745391705069125,km,16384 -287,0.6100301452452727,ae,16384 -288,0.7072607260726073,lb,16384 -289,0.64990234375,gp,16384 -290,0.628475147430497,il,16384 -291,0.610595703125,gb,16384 -292,0.64208984375,mq,16384 -293,0.82470703125,cd,16384 +0,0.640625,ca,4096 +1,0.6316225165562914,cl,4096 +2,0.67333984375,ml,4096 +3,0.6127735596248325,dj,4096 +4,0.72412109375,cm,4096 +5,0.745849609375,bf,4096 +6,0.600830078125,be,4096 +7,0.5625,ch,4096 +8,0.69677734375,gn,4096 +9,0.6865234375,ga,4096 +10,0.6591796875,mc,4096 +11,0.70263671875,bj,4096 +12,0.760009765625,ht,4096 +13,0.504638671875,lu,4096 +14,0.71484375,td,4096 +15,0.5236442516268981,pf,4096 +16,0.66357421875,fr,4096 +17,0.67822265625,cg,4096 +18,0.7306434023991276,rw,4096 +19,0.7060546875,tg,4096 +20,0.695068359375,sn,4096 +21,0.5434402332361516,nc,4096 +22,0.691162109375,ne,4096 +23,0.3102310231023102,cf,4096 +24,0.7004608294930875,km,4096 +25,0.79736328125,cd,4096 +26,0.657958984375,ca,8192 +27,0.6316225165562914,cl,8192 +28,0.691162109375,ml,8192 +29,0.6239392585975883,dj,8192 +30,0.750732421875,cm,8192 +31,0.762939453125,bf,8192 +32,0.593994140625,be,8192 +33,0.5556640625,ch,8192 +34,0.713623046875,gn,8192 +35,0.70849609375,ga,8192 +36,0.6728515625,mc,8192 +37,0.7158203125,bj,8192 +38,0.78125,ht,8192 +39,0.5146484375,lu,8192 +40,0.72900390625,td,8192 +41,0.5336225596529284,pf,8192 +42,0.677978515625,fr,8192 +43,0.695068359375,cg,8192 +44,0.7310069065794257,rw,8192 +45,0.725830078125,tg,8192 +46,0.7138671875,sn,8192 +47,0.5521865889212828,nc,8192 +48,0.71142578125,ne,8192 +49,0.3606789250353607,cf,8192 +50,0.679147465437788,km,8192 +51,0.8134765625,cd,8192 +52,0.67333984375,ca,16384 +53,0.6316225165562914,cl,16384 +54,0.710205078125,ml,16384 +55,0.6239392585975883,dj,16384 +56,0.77197265625,cm,16384 +57,0.78564453125,bf,16384 +58,0.60205078125,be,16384 +59,0.560302734375,ch,16384 +60,0.738525390625,gn,16384 +61,0.722412109375,ga,16384 +62,0.68017578125,mc,16384 +63,0.74609375,bj,16384 +64,0.80712890625,ht,16384 +65,0.53857421875,lu,16384 +66,0.752685546875,td,16384 +67,0.5336225596529284,pf,16384 +68,0.70068359375,fr,16384 +69,0.7119140625,cg,16384 +70,0.7310069065794257,rw,16384 +71,0.744140625,tg,16384 +72,0.727783203125,sn,16384 +73,0.5521865889212828,nc,16384 +74,0.7353515625,ne,16384 +75,0.3866100895803866,cf,16384 +76,0.679147465437788,km,16384 +77,0.837890625,cd,16384 From 5921c808e59061bc70afd7752c6861ac9f46f649 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 29 May 2024 06:54:25 -0500 Subject: [PATCH 09/15] Hindi, Indonesian, Italian, Japanese, Korean, Dutch, Poish --- dialectid/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dialectid/utils.py b/dialectid/utils.py index 13bdc78..986ec73 100644 --- a/dialectid/utils.py +++ b/dialectid/utils.py @@ -61,6 +61,7 @@ 'so', 'sd', 'sy', # Somalia, Sudan, Syria 'tn', 'ae', 'ye' # Tunisia, United Arab Emirates, Yemen ], + ca=['es'], # Spain de=['at', 'de', 'ch'], # Austria, Germany, Switzerland fr=['be', 'bj', 'bf', # Belgium, Benin, Burkina Faso 'cm', 'ca', 'cf', # Cameroon, Canada, Central African Republic @@ -72,4 +73,11 @@ 'ne', 'rw', 'sn', # Niger, Rwanda, Senegal 'ch', 'tg' # Switzerland, Togo ] + hi=['in'], # India + in=['id'], # Indonesia + it=['it'], # Italy + ja=['jp'], # Japan + ko=['kr'], # Korea + nl=['be', 'nl'], # Belgium, Netherlands + pl=['pl'] # Poland ) From e168a7fb6c88e3bcebad03b49b822d8e960711bc Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 29 May 2024 10:44:08 -0500 Subject: [PATCH 10/15] Portuguese, Russian, Tagalog, Turkish, Chinese --- dialectid/tests/test_utils.py | 15 ++++ dialectid/utils.py | 130 +++++++++++++++++++--------------- 2 files changed, 86 insertions(+), 59 deletions(-) diff --git a/dialectid/tests/test_utils.py b/dialectid/tests/test_utils.py index f6c68c5..5620daa 100644 --- a/dialectid/tests/test_utils.py +++ b/dialectid/tests/test_utils.py @@ -35,3 +35,18 @@ def test_countries(): assert 'ye' in ar and 'so' in ar de = utils.COUNTRIES['de'] assert 'de' in de and 'ch' in de + for lang in ['ca', 'hi', 'in', + 'it', 'ja', 'ko', + 'pl', 'tl']: + assert lang in utils.COUNTRIES + _ = utils.COUNTRIES[lang] + assert len(_) == 1 + pt = utils.COUNTRIES['pt'] + assert 'br' in pt and 'pt' in pt + ru = utils.COUNTRIES['ru'] + assert 'ru' in ru and 'kz' in ru + tr = utils.COUNTRIES['tr'] + assert 'cy' in tr + zh = utils.COUNTRIES['zh'] + assert 'cn' in zh and 'tw' in zh + diff --git a/dialectid/utils.py b/dialectid/utils.py index 986ec73..c72f6a4 100644 --- a/dialectid/utils.py +++ b/dialectid/utils.py @@ -22,62 +22,74 @@ # https://www.cia.gov/the-world-factbook/about/archives/2021/field/languages/ -COUNTRIES = dict(es=['mx', 'cl', 'es', # Mexico (MX), Chile (CL), Spain (ES) - 'ar', 'co', 'pe', # Argentina (AR), Colombia (CO), Peru (PE) - 've', 'do', 'py', # Venezuela (VE), Dominican Republic (DO), Paraguay (PY) - 'ec', 'uy', 'cr', # Ecuador (EC), Uruguay (UY), Costa Rica (CR) - 'sv', 'pa', 'gt', # El Salvador (SV), Panama (PA), Guatemala (GT) - 'hn', 'ni', 'bo', # Honduras (HN), Nicaragua (NI), Bolivia (BO) - 'cu' # Cuba (CU) - ], - en=['ai', 'ag', 'au', # Anguilla, Antigua and Barbuda, Australia - 'bs', 'bb', 'bz', # Bahamas, Barbados, Belize - 'bm', 'vg', 'cm', # Bermuda, British Virgin Islands, Cameroon - 'ca', 'ky', 'ck', # Canada, Cayman Islands, Cook Islands - 'dm', 'sz', 'fk', # Dominica, Eswatini, Falkland Islands - 'fj', 'gm', 'gz', # Fiji, Gambia, Ghana - 'gi', 'gd', 'gu', # Gibraltar, Grenada, Guam - 'gg', 'gy', 'in', # Guernsey, Guyana, India - 'ie', 'im', 'jm', # Ireland, Isle of Man, Jamaica - 'ke', 'ls', 'lr', # Kenya, Lesotho, Liberia - 'mw', 'mt', 'mu', # Malawi, Malta, Mauritius - 'fm', 'na', 'nz', # Micronesia, Namibia, New Zealand - 'ng', 'mp', 'pk', # Nigeria, Northern Mariana Islands, Pakistan - 'pw', 'pg', 'ph', # Palau, Papua New Guinea, Philippines - 'rw', 'sh', 'kn', # Rwanda, Saint Helena, Ascension, and Tristan da Cunha, Saint Kitts and Nevis - 'lc', 'vc', 'sl', # Saint Lucia, Saint Vincent and the Grenadines, Sierra Leone - 'sg', 'sx', 'sb', # Singapore, Sint Maarten, Solomon Islands - 'za', 'sd', 'to', # South Africa, Sudan, Tonga - 'tt', 'tc', 'ug', # Trinidad y Tobago, Turks and Caicos Islands, Uganda - 'gb', 'us', 'vu', # United Kingdom, United States, Vanuatu - 'vg', 'vi', 'zm', # Virgin Islands (GB), Virgin Islands (US), Zambia - 'zw' # Zimbabwe - ], - ar=['dz', 'bh', 'td', # Algeria, Bahrain, Chad - 'dj', 'eg', 'iq', # Djibouti, Egypt, Iraq - 'jo', 'kw', 'lb', # Jordan, Kuwait, Lebanon, - 'ly', 'mr', 'ma', # Libya, Mauritania, Morocco - 'om', 'qa', 'sa', # Oman, Qatar, Saudi Arabia - 'so', 'sd', 'sy', # Somalia, Sudan, Syria - 'tn', 'ae', 'ye' # Tunisia, United Arab Emirates, Yemen - ], - ca=['es'], # Spain - de=['at', 'de', 'ch'], # Austria, Germany, Switzerland - fr=['be', 'bj', 'bf', # Belgium, Benin, Burkina Faso - 'cm', 'ca', 'cf', # Cameroon, Canada, Central African Republic - 'td', 'km', 'cd', # Chad, Comoros, Congo (Republic) - 'cg', 'cl', 'dj', # Congo, Cote d'lvoire, Djibouti - 'fr', 'pf', 'ga', # France, French Polynesia, Gabon - 'gn', 'ht', 'lu', # Guinea, Haiti, Luxembourg - 'ml', 'mc', 'nc', # Mali, Monaco, New Caledonia - 'ne', 'rw', 'sn', # Niger, Rwanda, Senegal - 'ch', 'tg' # Switzerland, Togo - ] - hi=['in'], # India - in=['id'], # Indonesia - it=['it'], # Italy - ja=['jp'], # Japan - ko=['kr'], # Korea - nl=['be', 'nl'], # Belgium, Netherlands - pl=['pl'] # Poland - ) +COUNTRIES = {'es':['mx', 'cl', 'es', # Mexico (MX), Chile (CL), Spain (ES) + 'ar', 'co', 'pe', # Argentina (AR), Colombia (CO), Peru (PE) + 've', 'do', 'py', # Venezuela (VE), Dominican Republic (DO), Paraguay (PY) + 'ec', 'uy', 'cr', # Ecuador (EC), Uruguay (UY), Costa Rica (CR) + 'sv', 'pa', 'gt', # El Salvador (SV), Panama (PA), Guatemala (GT) + 'hn', 'ni', 'bo', # Honduras (HN), Nicaragua (NI), Bolivia (BO) + 'cu' 'gq', # Cuba (CU), Equatorial Guinea + ], + 'en':['ai', 'ag', 'au', # Anguilla, Antigua and Barbuda, Australia + 'bs', 'bb', 'bz', # Bahamas, Barbados, Belize + 'bm', 'vg', 'cm', # Bermuda, British Virgin Islands, Cameroon + 'ca', 'ky', 'ck', # Canada, Cayman Islands, Cook Islands + 'dm', 'sz', 'fk', # Dominica, Eswatini, Falkland Islands + 'fj', 'gm', 'gz', # Fiji, Gambia, Ghana + 'gi', 'gd', 'gu', # Gibraltar, Grenada, Guam + 'gg', 'gy', 'in', # Guernsey, Guyana, India + 'ie', 'im', 'jm', # Ireland, Isle of Man, Jamaica + 'ke', 'ls', 'lr', # Kenya, Lesotho, Liberia + 'mw', 'mt', 'mu', # Malawi, Malta, Mauritius + 'fm', 'na', 'nz', # Micronesia, Namibia, New Zealand + 'ng', 'mp', 'pk', # Nigeria, Northern Mariana Islands, Pakistan + 'pw', 'pg', 'ph', # Palau, Papua New Guinea, Philippines + 'rw', 'sh', 'kn', # Rwanda, Saint Helena, Ascension, and Tristan da Cunha, Saint Kitts and Nevis + 'lc', 'vc', 'sl', # Saint Lucia, Saint Vincent and the Grenadines, Sierra Leone + 'sg', 'sx', 'sb', # Singapore, Sint Maarten, Solomon Islands + 'za', 'sd', 'to', # South Africa, Sudan, Tonga + 'tt', 'tc', 'ug', # Trinidad y Tobago, Turks and Caicos Islands, Uganda + 'gb', 'us', 'vu', # United Kingdom, United States, Vanuatu + 'vg', 'vi', 'zm', # Virgin Islands (GB), Virgin Islands (US), Zambia + 'zw' # Zimbabwe + ], + 'ar':['dz', 'bh', 'td', # Algeria, Bahrain, Chad + 'dj', 'eg', 'iq', # Djibouti, Egypt, Iraq + 'jo', 'kw', 'lb', # Jordan, Kuwait, Lebanon, + 'ly', 'mr', 'ma', # Libya, Mauritania, Morocco + 'om', 'qa', 'sa', # Oman, Qatar, Saudi Arabia + 'so', 'sd', 'sy', # Somalia, Sudan, Syria + 'tn', 'ae', 'ye' # Tunisia, United Arab Emirates, Yemen + ], + 'ca':['es'], # Spain + 'de':['at', 'de', 'ch'], # Austria, Germany, Switzerland + 'fr':['be', 'bj', 'bf', # Belgium, Benin, Burkina Faso + 'cm', 'ca', 'cf', # Cameroon, Canada, Central African Republic + 'td', 'km', 'cd', # Chad, Comoros, Congo (Republic) + 'cg', 'cl', 'dj', # Congo, Cote d'lvoire, Djibouti + 'fr', 'pf', 'ga', # France, French Polynesia, Gabon + 'gn', 'ht', 'lu', # Guinea, Haiti, Luxembourg + 'ml', 'mc', 'nc', # Mali, Monaco, New Caledonia + 'ne', 'rw', 'sn', # Niger, Rwanda, Senegal + 'ch', 'tg' # Switzerland, Togo + ], + 'hi':['in'], # India + 'in':['id'], # Indonesia + 'it':['it'], # Italy + 'ja':['jp'], # Japan + 'ko':['kr'], # Korea + 'nl':['be', 'nl'], # Belgium, Netherlands + 'pl':['pl'], # Poland + 'pt':['ao', 'br', 'cv', # Angola, Brazil, Cabo Verde + 'mz', 'pt' # Mozambique, Portugal + ], + 'ru':['by', 'kz', 'kg', # Belarus, Kazakhstan, Kyrgyzstan + 'ru' # Russian + ], + 'tl':['ph'], # Philippines + 'tr':['cy', 'tr'], # Cyprus, Turkey + 'zh':['cn', 'sg', 'hk', # China, Singapore, Hong Kong + 'tw' # Taiwan + ] + } + \ No newline at end of file From 0b38ff65f7e2a23c544a5b42c53b7444a7f010f0 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 29 May 2024 11:01:57 -0500 Subject: [PATCH 11/15] Typo in Spanish --- dialectid/tests/test_utils.py | 4 ++++ dialectid/utils.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dialectid/tests/test_utils.py b/dialectid/tests/test_utils.py index 5620daa..c80a21d 100644 --- a/dialectid/tests/test_utils.py +++ b/dialectid/tests/test_utils.py @@ -49,4 +49,8 @@ def test_countries(): assert 'cy' in tr zh = utils.COUNTRIES['zh'] assert 'cn' in zh and 'tw' in zh + for k, v in utils.COUNTRIES.items(): + assert len(k) == 2 + for i in v: + assert len(i) == 2 diff --git a/dialectid/utils.py b/dialectid/utils.py index c72f6a4..26ebf78 100644 --- a/dialectid/utils.py +++ b/dialectid/utils.py @@ -28,7 +28,7 @@ 'ec', 'uy', 'cr', # Ecuador (EC), Uruguay (UY), Costa Rica (CR) 'sv', 'pa', 'gt', # El Salvador (SV), Panama (PA), Guatemala (GT) 'hn', 'ni', 'bo', # Honduras (HN), Nicaragua (NI), Bolivia (BO) - 'cu' 'gq', # Cuba (CU), Equatorial Guinea + 'cu', 'gq', # Cuba (CU), Equatorial Guinea ], 'en':['ai', 'ag', 'au', # Anguilla, Antigua and Barbuda, Australia 'bs', 'bb', 'bz', # Bahamas, Barbados, Belize From 85fdf9c4b65cb62f4a8acd28ea8eca7aa7c9cb57 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 29 May 2024 10:06:16 -0600 Subject: [PATCH 12/15] Portuguese --- quarto/data/pt-recall.csv | 201 +++----------------------------------- quarto/dialectid.qmd | 65 ------------ 2 files changed, 15 insertions(+), 251 deletions(-) diff --git a/quarto/data/pt-recall.csv b/quarto/data/pt-recall.csv index d134485..afb5cb0 100644 --- a/quarto/data/pt-recall.csv +++ b/quarto/data/pt-recall.csv @@ -1,187 +1,16 @@ ,Recall,Country,Training Size -0,0.5904392764857881,cr,4096 -1,0.557861328125,nl,4096 -2,0.5819909128459314,hu,4096 -3,0.618896484375,es,4096 -4,0.615629259427533,tr,4096 -5,0.61962890625,de,4096 -6,0.5271889400921659,cn,4096 -7,0.5980210265924552,cz,4096 -8,0.8043217286914766,ro,4096 -9,0.586669921875,ar,4096 -10,0.5818399044205496,sg,4096 -11,0.5328767123287671,mt,4096 -12,0.585693359375,ca,4096 -13,0.68115234375,co,4096 -14,0.59375,cl,4096 -15,0.7588571428571429,ph,4096 -16,0.5172932330827068,hr,4096 -17,0.56396484375,fr,4096 -18,0.3949416342412451,ru,4096 -19,0.5927734375,jp,4096 -20,0.4992548435171386,th,4096 -21,0.517503805175038,gr,4096 -22,0.5387673956262425,fi,4096 -23,0.647705078125,pt,4096 -24,0.57177734375,us,4096 -25,0.5990271021542738,za,4096 -26,0.560546875,uy,4096 -27,0.6025730180806675,pe,4096 -28,0.9156756756756756,ng,4096 -29,0.635498046875,br,4096 -30,0.69970703125,mx,4096 -31,0.5341530054644809,at,4096 -32,0.539794921875,py,4096 -33,0.73486328125,mz,4096 -34,0.572509765625,au,4096 -35,0.5319528739735808,nz,4096 -36,0.574951171875,se,4096 -37,0.5993265993265994,eg,4096 -38,0.6681792399319342,id,4096 -39,0.6284032376747608,no,4096 -40,0.5823798627002288,ee,4096 -41,0.958984375,tz,4096 -42,0.8277262180974478,ve,4096 -43,0.7971590909090909,in,4096 -44,0.5333556821991284,ae,4096 -45,0.7654067378800329,cv,4096 -46,0.5724137931034483,pl,4096 -47,0.7404299947561616,pa,4096 -48,0.5562700964630225,ec,4096 -49,0.630113876001687,il,4096 -50,0.521484375,be,4096 -51,0.5162068965517241,lu,4096 -52,0.587158203125,gb,4096 -53,0.76171875,ao,4096 -54,0.28451882845188287,sk,4096 -55,0.7089508002371073,do,4096 -56,0.5455259026687598,bo,4096 -57,0.5849609375,ie,4096 -58,0.570556640625,ch,4096 -59,0.5597798899449725,dk,4096 -60,0.5674967234600262,kr,4096 -61,0.5546875,it,4096 -62,0.5904392764857881,cr,8192 -63,0.568603515625,nl,8192 -64,0.6146220570012392,hu,8192 -65,0.62158203125,es,8192 -66,0.6419809177646524,tr,8192 -67,0.616943359375,de,8192 -68,0.5359447004608295,cn,8192 -69,0.6054421768707483,cz,8192 -70,0.8043217286914766,ro,8192 -71,0.588623046875,ar,8192 -72,0.5818399044205496,sg,8192 -73,0.5328767123287671,mt,8192 -74,0.608642578125,ca,8192 -75,0.697509765625,co,8192 -76,0.599609375,cl,8192 -77,0.7588571428571429,ph,8192 -78,0.5172932330827068,hr,8192 -79,0.55810546875,fr,8192 -80,0.3949416342412451,ru,8192 -81,0.59326171875,jp,8192 -82,0.4992548435171386,th,8192 -83,0.5159817351598174,gr,8192 -84,0.5455268389662028,fi,8192 -85,0.65673828125,pt,8192 -86,0.57373046875,us,8192 -87,0.6004169562195969,za,8192 -88,0.5615234375,uy,8192 -89,0.605702364394993,pe,8192 -90,0.9156756756756756,ng,8192 -91,0.654296875,br,8192 -92,0.710205078125,mx,8192 -93,0.5519125683060109,at,8192 -94,0.546875,py,8192 -95,0.754150390625,mz,8192 -96,0.571044921875,au,8192 -97,0.5319528739735808,nz,8192 -98,0.583984375,se,8192 -99,0.5993265993265994,eg,8192 -100,0.6693136698808848,id,8192 -101,0.6350257542310522,no,8192 -102,0.5823798627002288,ee,8192 -103,0.96142578125,tz,8192 -104,0.8254060324825986,ve,8192 -105,0.8022727272727272,in,8192 -106,0.5457593027153872,ae,8192 -107,0.7814297452752671,cv,8192 -108,0.5681697612732095,pl,8192 -109,0.7503932878867331,pa,8192 -110,0.5572347266881029,ec,8192 -111,0.630113876001687,il,8192 -112,0.50830078125,be,8192 -113,0.51,lu,8192 -114,0.585205078125,gb,8192 -115,0.793212890625,ao,8192 -116,0.28451882845188287,sk,8192 -117,0.7083580320094843,do,8192 -118,0.5451334379905809,bo,8192 -119,0.60595703125,ie,8192 -120,0.57275390625,ch,8192 -121,0.5632816408204102,dk,8192 -122,0.5705548274355614,kr,8192 -123,0.5517578125,it,8192 -124,0.5904392764857881,cr,16384 -125,0.57470703125,nl,16384 -126,0.6146220570012392,hu,16384 -127,0.62158203125,es,16384 -128,0.6419809177646524,tr,16384 -129,0.60498046875,de,16384 -130,0.5359447004608295,cn,16384 -131,0.6054421768707483,cz,16384 -132,0.8043217286914766,ro,16384 -133,0.58056640625,ar,16384 -134,0.5818399044205496,sg,16384 -135,0.5328767123287671,mt,16384 -136,0.60302734375,ca,16384 -137,0.69091796875,co,16384 -138,0.596435546875,cl,16384 -139,0.7588571428571429,ph,16384 -140,0.5172932330827068,hr,16384 -141,0.55859375,fr,16384 -142,0.3949416342412451,ru,16384 -143,0.6005859375,jp,16384 -144,0.4992548435171386,th,16384 -145,0.5159817351598174,gr,16384 -146,0.5455268389662028,fi,16384 -147,0.65966796875,pt,16384 -148,0.574462890625,us,16384 -149,0.6004169562195969,za,16384 -150,0.5673828125,uy,16384 -151,0.6053546592489569,pe,16384 -152,0.9156756756756756,ng,16384 -153,0.6630859375,br,16384 -154,0.720947265625,mx,16384 -155,0.5519125683060109,at,16384 -156,0.54296875,py,16384 -157,0.7626953125,mz,16384 -158,0.564208984375,au,16384 -159,0.5319528739735808,nz,16384 -160,0.577392578125,se,16384 -161,0.5993265993265994,eg,16384 -162,0.6693136698808848,id,16384 -163,0.6350257542310522,no,16384 -164,0.5823798627002288,ee,16384 -165,0.963134765625,tz,16384 -166,0.8254060324825986,ve,16384 -167,0.8002840909090909,in,16384 -168,0.5491116325846463,ae,16384 -169,0.7814297452752671,cv,16384 -170,0.5681697612732095,pl,16384 -171,0.7503932878867331,pa,16384 -172,0.5572347266881029,ec,16384 -173,0.630113876001687,il,16384 -174,0.50927734375,be,16384 -175,0.51,lu,16384 -176,0.588134765625,gb,16384 -177,0.7998046875,ao,16384 -178,0.28451882845188287,sk,16384 -179,0.7083580320094843,do,16384 -180,0.5451334379905809,bo,16384 -181,0.610107421875,ie,16384 -182,0.56884765625,ch,16384 -183,0.5632816408204102,dk,16384 -184,0.5705548274355614,kr,16384 -185,0.544921875,it,16384 +0,0.66064453125,pt,4096 +1,0.751220703125,br,4096 +2,0.73583984375,mz,4096 +3,0.7847165160230074,cv,4096 +4,0.77099609375,ao,4096 +5,0.66162109375,pt,8192 +6,0.762939453125,br,8192 +7,0.7568359375,mz,8192 +8,0.7978635990139687,cv,8192 +9,0.794677734375,ao,8192 +10,0.6669921875,pt,16384 +11,0.778076171875,br,16384 +12,0.770263671875,mz,16384 +13,0.7978635990139687,cv,16384 +14,0.79833984375,ao,16384 diff --git a/quarto/dialectid.qmd b/quarto/dialectid.qmd index 2db3341..5c2624d 100644 --- a/quarto/dialectid.qmd +++ b/quarto/dialectid.qmd @@ -86,45 +86,6 @@ fig = px.bar(df2.astype({'Training Size': str}), fig.show() ``` -```{python} -#| echo: false -#| title: Hindi (hi) -import pandas as pd -df = pd.read_csv('data/hi-recall.csv', index_col=0) -df2 = df.sort_values(by=['Training Size', 'Recall']) -fig = px.bar(df2.astype({'Training Size': str}), - x='Country', y='Recall', - barmode='overlay', - color='Training Size') -fig.show() -``` - -```{python} -#| echo: false -#| title: Indonesian (in) -import pandas as pd -df = pd.read_csv('data/in-recall.csv', index_col=0) -df2 = df.sort_values(by=['Training Size', 'Recall']) -fig = px.bar(df2.astype({'Training Size': str}), - x='Country', y='Recall', - barmode='overlay', - color='Training Size') -fig.show() -``` - -```{python} -#| echo: false -#| title: Italian (it) -import pandas as pd -df = pd.read_csv('data/it-recall.csv', index_col=0) -df2 = df.sort_values(by=['Training Size', 'Recall']) -fig = px.bar(df2.astype({'Training Size': str}), - x='Country', y='Recall', - barmode='overlay', - color='Training Size') -fig.show() -``` - ```{python} #| echo: false #| title: Dutch (nl) @@ -138,19 +99,6 @@ fig = px.bar(df2.astype({'Training Size': str}), fig.show() ``` -```{python} -#| echo: false -#| title: Polish (pl) -import pandas as pd -df = pd.read_csv('data/pl-recall.csv', index_col=0) -df2 = df.sort_values(by=['Training Size', 'Recall']) -fig = px.bar(df2.astype({'Training Size': str}), - x='Country', y='Recall', - barmode='overlay', - color='Training Size') -fig.show() -``` - ```{python} #| echo: false #| title: Portuguese (pt) @@ -177,19 +125,6 @@ fig = px.bar(df2.astype({'Training Size': str}), fig.show() ``` -```{python} -#| echo: false -#| title: Tagalog (tl) -import pandas as pd -df = pd.read_csv('data/tl-recall.csv', index_col=0) -df2 = df.sort_values(by=['Training Size', 'Recall']) -fig = px.bar(df2.astype({'Training Size': str}), - x='Country', y='Recall', - barmode='overlay', - color='Training Size') -fig.show() -``` - ```{python} #| echo: false #| title: Turkish (tr) From f775d45dba1c48bec564dfa1226aceeb96b05a6a Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 29 May 2024 12:01:51 -0600 Subject: [PATCH 13/15] Missing languages --- quarto/data/hi-recall.csv | 7 -- quarto/data/in-recall.csv | 28 ----- quarto/data/it-recall.csv | 7 -- quarto/data/pl-recall.csv | 10 -- quarto/data/ru-recall.csv | 219 +++----------------------------------- quarto/data/tl-recall.csv | 19 ---- quarto/data/tr-recall.csv | 24 ++--- quarto/data/zh-recall.csv | 51 +++------ 8 files changed, 30 insertions(+), 335 deletions(-) delete mode 100644 quarto/data/hi-recall.csv delete mode 100644 quarto/data/in-recall.csv delete mode 100644 quarto/data/it-recall.csv delete mode 100644 quarto/data/pl-recall.csv delete mode 100644 quarto/data/tl-recall.csv diff --git a/quarto/data/hi-recall.csv b/quarto/data/hi-recall.csv deleted file mode 100644 index 37b9d16..0000000 --- a/quarto/data/hi-recall.csv +++ /dev/null @@ -1,7 +0,0 @@ -,Recall,Country,Training Size -0,0.97802734375,in,4096 -1,0.91357421875,pk,4096 -2,0.9853515625,in,8192 -3,0.915771484375,pk,8192 -4,0.9921875,in,16384 -5,0.915771484375,pk,16384 diff --git a/quarto/data/in-recall.csv b/quarto/data/in-recall.csv deleted file mode 100644 index 81b318f..0000000 --- a/quarto/data/in-recall.csv +++ /dev/null @@ -1,28 +0,0 @@ -,Recall,Country,Training Size -0,0.9741620111731844,ke,4096 -1,0.9518324607329843,za,4096 -2,0.7992227979274611,us,4096 -3,0.96142578125,id,4096 -4,0.9866310160427807,tz,4096 -5,0.9782913165266106,pk,4096 -6,0.9753008233058899,ng,4096 -7,0.919677734375,my,4096 -8,0.9777472527472527,in,4096 -9,0.9748603351955307,ke,8192 -10,0.9528795811518325,za,8192 -11,0.7992227979274611,us,8192 -12,0.963623046875,id,8192 -13,0.9914438502673797,tz,8192 -14,0.976890756302521,pk,8192 -15,0.9753008233058899,ng,8192 -16,0.93505859375,my,8192 -17,0.9777472527472527,in,8192 -18,0.9748603351955307,ke,16384 -19,0.9528795811518325,za,16384 -20,0.7992227979274611,us,16384 -21,0.970947265625,id,16384 -22,0.990909090909091,tz,16384 -23,0.976890756302521,pk,16384 -24,0.9753008233058899,ng,16384 -25,0.95166015625,my,16384 -26,0.9774725274725274,in,16384 diff --git a/quarto/data/it-recall.csv b/quarto/data/it-recall.csv deleted file mode 100644 index 72d20cb..0000000 --- a/quarto/data/it-recall.csv +++ /dev/null @@ -1,7 +0,0 @@ -,Recall,Country,Training Size -0,0.714111328125,us,4096 -1,0.941162109375,it,4096 -2,0.714111328125,us,8192 -3,0.982177734375,it,8192 -4,0.714111328125,us,16384 -5,0.992431640625,it,16384 diff --git a/quarto/data/pl-recall.csv b/quarto/data/pl-recall.csv deleted file mode 100644 index c5745d2..0000000 --- a/quarto/data/pl-recall.csv +++ /dev/null @@ -1,10 +0,0 @@ -,Recall,Country,Training Size -0,0.5414285714285715,gb,4096 -1,0.503242542153048,de,4096 -2,0.59765625,pl,4096 -3,0.5414285714285715,gb,8192 -4,0.503242542153048,de,8192 -5,0.768798828125,pl,8192 -6,0.5414285714285715,gb,16384 -7,0.503242542153048,de,16384 -8,0.927490234375,pl,16384 diff --git a/quarto/data/ru-recall.csv b/quarto/data/ru-recall.csv index 816429e..26f90dd 100644 --- a/quarto/data/ru-recall.csv +++ b/quarto/data/ru-recall.csv @@ -1,208 +1,13 @@ ,Recall,Country,Training Size -0,0.6229712858926342,ro,4096 -1,0.50537109375,tr,4096 -2,0.6171875,de,4096 -3,0.58740234375,cz,4096 -4,0.5259911894273128,hu,4096 -5,0.527587890625,cn,4096 -6,0.597412109375,nl,4096 -7,0.636474609375,lv,4096 -8,0.5146484375,es,4096 -9,0.4793187347931874,lk,4096 -10,0.7339331619537275,sg,4096 -11,0.3597014925373134,ar,4096 -12,0.62646484375,ca,4096 -13,0.536865234375,am,4096 -14,0.42748091603053434,cl,4096 -15,0.46285289747399705,vn,4096 -16,0.6318359375,fr,4096 -17,0.5909090909090909,hr,4096 -18,0.627685546875,ru,4096 -19,0.668212890625,jp,4096 -20,0.59619140625,kg,4096 -21,0.6718516302722106,fi,4096 -22,0.6094147582697201,gr,4096 -23,0.61328125,ua,4096 -24,0.629638671875,pt,4096 -25,0.59619140625,us,4096 -26,0.576904296875,th,4096 -27,0.45569620253164556,pe,4096 -28,0.48366013071895425,br,4096 -29,0.7712264150943396,si,4096 -30,0.5617095508068033,at,4096 -31,0.5913838120104439,mx,4096 -32,0.4990234375,kz,4096 -33,0.650390625,cy,4096 -34,0.6926184355490268,au,4096 -35,0.64111328125,se,4096 -36,0.7963525835866262,sa,4096 -37,0.6961883408071748,nz,4096 -38,0.4833029751062538,ch,4096 -39,0.4616709732988803,eg,4096 -40,0.46496815286624205,is,4096 -41,0.598876953125,ee,4096 -42,0.99169921875,mn,4096 -43,0.48381400824014126,id,4096 -44,0.6932173913043478,no,4096 -45,0.7626953125,md,4096 -46,0.556396484375,by,4096 -47,0.465244322092223,in,4096 -48,0.749267475931352,bg,4096 -49,0.534430225472273,ae,4096 -50,0.47265625,rs,4096 -51,0.60498046875,pl,4096 -52,0.57080078125,ge,4096 -53,0.615966796875,il,4096 -54,0.5526315789473685,be,4096 -55,0.4936868686868687,lu,4096 -56,0.57080078125,gb,4096 -57,0.6961577350859454,sk,4096 -58,0.8302439024390244,do,4096 -59,0.9816247582205029,mk,4096 -60,0.5252752752752753,az,4096 -61,0.6196911196911197,ie,4096 -62,0.5421869088157397,me,4096 -63,0.5934579439252337,tj,4096 -64,0.785400390625,uz,4096 -65,0.5724962630792227,dk,4096 -66,0.56689453125,lt,4096 -67,0.7161117717003567,kr,4096 -68,0.748291015625,it,4096 -69,0.616729088639201,ro,8192 -70,0.518798828125,tr,8192 -71,0.622314453125,de,8192 -72,0.598876953125,cz,8192 -73,0.5286343612334802,hu,8192 -74,0.54296875,cn,8192 -75,0.61767578125,nl,8192 -76,0.624267578125,lv,8192 -77,0.53271484375,es,8192 -78,0.4793187347931874,lk,8192 -79,0.7416452442159382,sg,8192 -80,0.3691542288557214,ar,8192 -81,0.630126953125,ca,8192 -82,0.548828125,am,8192 -83,0.42748091603053434,cl,8192 -84,0.4658246656760773,vn,8192 -85,0.628662109375,fr,8192 -86,0.6060606060606061,hr,8192 -87,0.593994140625,ru,8192 -88,0.69091796875,jp,8192 -89,0.59521484375,kg,8192 -90,0.6829195333532755,fi,8192 -91,0.6246819338422391,gr,8192 -92,0.635009765625,ua,8192 -93,0.64453125,pt,8192 -94,0.61181640625,us,8192 -95,0.591796875,th,8192 -96,0.45569620253164556,pe,8192 -97,0.48366013071895425,br,8192 -98,0.7794811320754716,si,8192 -99,0.5673789795028347,at,8192 -100,0.6174934725848564,mx,8192 -101,0.529296875,kz,8192 -102,0.65625,cy,8192 -103,0.702533969886155,au,8192 -104,0.6484375,se,8192 -105,0.7963525835866262,sa,8192 -106,0.6771300448430493,nz,8192 -107,0.49241044323011535,ch,8192 -108,0.4754521963824289,eg,8192 -109,0.46496815286624205,is,8192 -110,0.60107421875,ee,8192 -111,0.9921875,mn,8192 -112,0.5026486168334314,id,8192 -113,0.7005217391304348,no,8192 -114,0.75439453125,md,8192 -115,0.5556640625,by,8192 -116,0.49208534067446663,in,8192 -117,0.7572205943909586,bg,8192 -118,0.5295551492992078,ae,8192 -119,0.48828125,rs,8192 -120,0.60546875,pl,8192 -121,0.56787109375,ge,8192 -122,0.61474609375,il,8192 -123,0.5373514431239389,be,8192 -124,0.4936868686868687,lu,8192 -125,0.561767578125,gb,8192 -126,0.7002022244691608,sk,8192 -127,0.8317073170731707,do,8192 -128,0.9825918762088974,mk,8192 -129,0.529029029029029,az,8192 -130,0.637065637065637,ie,8192 -131,0.5493757094211124,me,8192 -132,0.5887850467289719,tj,8192 -133,0.791748046875,uz,8192 -134,0.57847533632287,dk,8192 -135,0.568115234375,lt,8192 -136,0.7235434007134364,kr,8192 -137,0.74951171875,it,8192 -138,0.616729088639201,ro,16384 -139,0.5244140625,tr,16384 -140,0.631103515625,de,16384 -141,0.596435546875,cz,16384 -142,0.5515418502202644,hu,16384 -143,0.557373046875,cn,16384 -144,0.618408203125,nl,16384 -145,0.642333984375,lv,16384 -146,0.523681640625,es,16384 -147,0.4793187347931874,lk,16384 -148,0.7416452442159382,sg,16384 -149,0.3691542288557214,ar,16384 -150,0.637451171875,ca,16384 -151,0.552978515625,am,16384 -152,0.42748091603053434,cl,16384 -153,0.4658246656760773,vn,16384 -154,0.62353515625,fr,16384 -155,0.6060606060606061,hr,16384 -156,0.609130859375,ru,16384 -157,0.711669921875,jp,16384 -158,0.600830078125,kg,16384 -159,0.6850134609632067,fi,16384 -160,0.6259541984732825,gr,16384 -161,0.65087890625,ua,16384 -162,0.6650390625,pt,16384 -163,0.6142578125,us,16384 -164,0.608154296875,th,16384 -165,0.45569620253164556,pe,16384 -166,0.48366013071895425,br,16384 -167,0.7794811320754716,si,16384 -168,0.5791539467945922,at,16384 -169,0.6135770234986945,mx,16384 -170,0.5390625,kz,16384 -171,0.647216796875,cy,16384 -172,0.7095115681233933,au,16384 -173,0.649658203125,se,16384 -174,0.7963525835866262,sa,16384 -175,0.6782511210762332,nz,16384 -176,0.50394656952034,ch,16384 -177,0.4754521963824289,eg,16384 -178,0.46496815286624205,is,16384 -179,0.602294921875,ee,16384 -180,0.9951171875,mn,16384 -181,0.5026486168334314,id,16384 -182,0.695304347826087,no,16384 -183,0.74755859375,md,16384 -184,0.557861328125,by,16384 -185,0.49208534067446663,in,16384 -186,0.7643365424863959,bg,16384 -187,0.5472273004265692,ae,16384 -188,0.483154296875,rs,16384 -189,0.602294921875,pl,16384 -190,0.585693359375,ge,16384 -191,0.603271484375,il,16384 -192,0.5517826825127334,be,16384 -193,0.4936868686868687,lu,16384 -194,0.579345703125,gb,16384 -195,0.7080384226491405,sk,16384 -196,0.8317073170731707,do,16384 -197,0.9816247582205029,mk,16384 -198,0.5467967967967968,az,16384 -199,0.6472007722007722,ie,16384 -200,0.5622398789254635,me,16384 -201,0.5887850467289719,tj,16384 -202,0.79736328125,uz,16384 -203,0.57847533632287,dk,16384 -204,0.583251953125,lt,16384 -205,0.7199762187871581,kr,16384 -206,0.7470703125,it,16384 +0,0.619140625,ru,4096 +1,0.5751953125,kg,4096 +2,0.49267578125,kz,4096 +3,0.522705078125,by,4096 +4,0.615478515625,ru,8192 +5,0.58349609375,kg,8192 +6,0.522216796875,kz,8192 +7,0.53857421875,by,8192 +8,0.628173828125,ru,16384 +9,0.599609375,kg,16384 +10,0.53955078125,kz,16384 +11,0.550537109375,by,16384 diff --git a/quarto/data/tl-recall.csv b/quarto/data/tl-recall.csv deleted file mode 100644 index 9192a8e..0000000 --- a/quarto/data/tl-recall.csv +++ /dev/null @@ -1,19 +0,0 @@ -,Recall,Country,Training Size -0,0.9784637473079684,ke,4096 -1,0.9625312239800167,za,4096 -2,0.6186094069529653,us,4096 -3,0.9858429858429858,tz,4096 -4,0.9825744800449691,in,4096 -5,0.95849609375,ph,4096 -6,0.9798994974874372,ke,8192 -7,0.9650291423813488,za,8192 -8,0.6186094069529653,us,8192 -9,0.9858429858429858,tz,8192 -10,0.9814502529510961,in,8192 -11,0.96240234375,ph,8192 -12,0.9798994974874372,ke,16384 -13,0.9650291423813488,za,16384 -14,0.6186094069529653,us,16384 -15,0.9858429858429858,tz,16384 -16,0.9814502529510961,in,16384 -17,0.970947265625,ph,16384 diff --git a/quarto/data/tr-recall.csv b/quarto/data/tr-recall.csv index 59df848..97f9582 100644 --- a/quarto/data/tr-recall.csv +++ b/quarto/data/tr-recall.csv @@ -1,19 +1,7 @@ ,Recall,Country,Training Size -0,0.5551948051948052,cy,4096 -1,0.685546875,tr,4096 -2,0.5620437956204379,de,4096 -3,0.5955137481910275,us,4096 -4,0.6060948081264108,gb,4096 -5,0.7715665976535542,az,4096 -6,0.5551948051948052,cy,8192 -7,0.680908203125,tr,8192 -8,0.5605839416058395,de,8192 -9,0.5955137481910275,us,8192 -10,0.6060948081264108,gb,8192 -11,0.7736369910282954,az,8192 -12,0.5551948051948052,cy,16384 -13,0.690185546875,tr,16384 -14,0.5605839416058395,de,16384 -15,0.5955137481910275,us,16384 -16,0.6060948081264108,gb,16384 -17,0.7736369910282954,az,16384 +0,0.5584415584415584,cy,4096 +1,0.758544921875,tr,4096 +2,0.5584415584415584,cy,8192 +3,0.913818359375,tr,8192 +4,0.5584415584415584,cy,16384 +5,0.980224609375,tr,16384 diff --git a/quarto/data/zh-recall.csv b/quarto/data/zh-recall.csv index 3dd75b4..891bf21 100644 --- a/quarto/data/zh-recall.csv +++ b/quarto/data/zh-recall.csv @@ -1,40 +1,13 @@ ,Recall,Country,Training Size -0,0.68701171875,jp,4096 -1,0.758056640625,au,4096 -2,0.6970443349753694,th,4096 -3,0.890625,cn,4096 -4,0.95947265625,tw,4096 -5,0.702392578125,us,4096 -6,0.6246948029298919,gb,4096 -7,0.613037109375,hk,4096 -8,0.6583522297808012,sg,4096 -9,0.7607421875,ca,4096 -10,0.741455078125,my,4096 -11,0.7243032664069523,fr,4096 -12,0.6502827316224445,kr,4096 -13,0.683349609375,jp,8192 -14,0.7421875,au,8192 -15,0.6970443349753694,th,8192 -16,0.89013671875,cn,8192 -17,0.960205078125,tw,8192 -18,0.702392578125,us,8192 -19,0.6246948029298919,gb,8192 -20,0.635009765625,hk,8192 -21,0.654824892920131,sg,8192 -22,0.74853515625,ca,8192 -23,0.73779296875,my,8192 -24,0.7243032664069523,fr,8192 -25,0.6502827316224445,kr,8192 -26,0.69775390625,jp,16384 -27,0.7421875,au,16384 -28,0.6970443349753694,th,16384 -29,0.889892578125,cn,16384 -30,0.9638671875,tw,16384 -31,0.698974609375,us,16384 -32,0.6246948029298919,gb,16384 -33,0.640380859375,hk,16384 -34,0.654824892920131,sg,16384 -35,0.749267578125,ca,16384 -36,0.73779296875,my,16384 -37,0.7243032664069523,fr,16384 -38,0.6502827316224445,kr,16384 +0,0.964599609375,cn,4096 +1,0.968017578125,tw,4096 +2,0.627197265625,hk,4096 +3,0.7228521038044847,sg,4096 +4,0.9677734375,cn,8192 +5,0.96923828125,tw,8192 +6,0.66064453125,hk,8192 +7,0.7195767195767195,sg,8192 +8,0.970703125,cn,16384 +9,0.969970703125,tw,16384 +10,0.660400390625,hk,16384 +11,0.7195767195767195,sg,16384 From f0620511beecd002149dc288ac4900b266608bc5 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 29 May 2024 13:17:04 -0600 Subject: [PATCH 14/15] Missing Country in Spanish --- quarto/data/es-recall.csv | 113 +++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 55 deletions(-) diff --git a/quarto/data/es-recall.csv b/quarto/data/es-recall.csv index 68daba9..dca0c94 100644 --- a/quarto/data/es-recall.csv +++ b/quarto/data/es-recall.csv @@ -1,58 +1,61 @@ ,Recall,Country,Training Size -0,0.563720703125,cr,4096 -1,0.62939453125,cl,4096 -2,0.610107421875,ni,4096 +0,0.5634765625,cr,4096 +1,0.630126953125,cl,4096 +2,0.60986328125,ni,4096 3,0.585693359375,gt,4096 -4,0.60205078125,pe,4096 -5,0.650634765625,do,4096 -6,0.63232421875,ar,4096 -7,0.589599609375,hn,4096 -8,0.625732421875,mx,4096 -9,0.68896484375,cu,4096 -10,0.51708984375,bo,4096 -11,0.680419921875,es,4096 -12,0.609375,uy,4096 -13,0.60595703125,sv,4096 -14,0.63525390625,ve,4096 -15,0.5791015625,pa,4096 +4,0.602783203125,pe,4096 +5,0.651123046875,do,4096 +6,0.6298828125,ar,4096 +7,0.58984375,hn,4096 +8,0.626953125,mx,4096 +9,0.688720703125,cu,4096 +10,0.517578125,bo,4096 +11,0.67919921875,es,4096 +12,0.608642578125,uy,4096 +13,0.605712890625,sv,4096 +14,0.636474609375,ve,4096 +15,0.576904296875,pa,4096 16,0.598388671875,ec,4096 -17,0.601318359375,co,4096 -18,0.60302734375,py,4096 -19,0.5791015625,cr,8192 -20,0.65625,cl,8192 -21,0.624755859375,ni,8192 -22,0.6103515625,gt,8192 -23,0.62646484375,pe,8192 -24,0.673828125,do,8192 -25,0.656005859375,ar,8192 -26,0.620361328125,hn,8192 -27,0.638427734375,mx,8192 -28,0.7060546875,cu,8192 -29,0.541748046875,bo,8192 -30,0.716552734375,es,8192 -31,0.642333984375,uy,8192 -32,0.620849609375,sv,8192 -33,0.64990234375,ve,8192 -34,0.59912109375,pa,8192 -35,0.6015625,ec,8192 -36,0.626953125,co,8192 -37,0.63623046875,py,8192 -38,0.59521484375,cr,16384 -39,0.670166015625,cl,16384 -40,0.647705078125,ni,16384 -41,0.63134765625,gt,16384 -42,0.639892578125,pe,16384 -43,0.700927734375,do,16384 -44,0.680908203125,ar,16384 -45,0.639404296875,hn,16384 -46,0.660888671875,mx,16384 -47,0.737060546875,cu,16384 -48,0.569580078125,bo,16384 -49,0.7421875,es,16384 -50,0.666015625,uy,16384 -51,0.654541015625,sv,16384 -52,0.67138671875,ve,16384 -53,0.6298828125,pa,16384 -54,0.623046875,ec,16384 -55,0.648681640625,co,16384 -56,0.666259765625,py,16384 +17,0.60205078125,co,4096 +18,0.63623046875,gq,4096 +19,0.601318359375,py,4096 +20,0.579833984375,cr,8192 +21,0.656005859375,cl,8192 +22,0.62451171875,ni,8192 +23,0.61181640625,gt,8192 +24,0.626220703125,pe,8192 +25,0.674072265625,do,8192 +26,0.656005859375,ar,8192 +27,0.619140625,hn,8192 +28,0.638427734375,mx,8192 +29,0.70556640625,cu,8192 +30,0.541259765625,bo,8192 +31,0.7158203125,es,8192 +32,0.641845703125,uy,8192 +33,0.62109375,sv,8192 +34,0.650634765625,ve,8192 +35,0.599853515625,pa,8192 +36,0.601806640625,ec,8192 +37,0.62646484375,co,8192 +38,0.664794921875,gq,8192 +39,0.63671875,py,8192 +40,0.595458984375,cr,16384 +41,0.669921875,cl,16384 +42,0.647705078125,ni,16384 +43,0.63037109375,gt,16384 +44,0.64013671875,pe,16384 +45,0.700927734375,do,16384 +46,0.6806640625,ar,16384 +47,0.6396484375,hn,16384 +48,0.6611328125,mx,16384 +49,0.7373046875,cu,16384 +50,0.56884765625,bo,16384 +51,0.742431640625,es,16384 +52,0.66552734375,uy,16384 +53,0.65478515625,sv,16384 +54,0.670166015625,ve,16384 +55,0.630615234375,pa,16384 +56,0.623046875,ec,16384 +57,0.648193359375,co,16384 +58,0.687255859375,gq,16384 +59,0.666015625,py,16384 From 8e114c4e6f44f9b089cf3d03531ab902a4a96080 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Tue, 4 Jun 2024 21:29:54 -0600 Subject: [PATCH 15/15] typo in Ghana --- dialectid/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dialectid/utils.py b/dialectid/utils.py index 26ebf78..0e62804 100644 --- a/dialectid/utils.py +++ b/dialectid/utils.py @@ -35,7 +35,7 @@ 'bm', 'vg', 'cm', # Bermuda, British Virgin Islands, Cameroon 'ca', 'ky', 'ck', # Canada, Cayman Islands, Cook Islands 'dm', 'sz', 'fk', # Dominica, Eswatini, Falkland Islands - 'fj', 'gm', 'gz', # Fiji, Gambia, Ghana + 'fj', 'gm', 'gh', # Fiji, Gambia, Ghana 'gi', 'gd', 'gu', # Gibraltar, Grenada, Guam 'gg', 'gy', 'in', # Guernsey, Guyana, India 'ie', 'im', 'jm', # Ireland, Isle of Man, Jamaica