Skip to content

Commit

Permalink
feat: update supported languages (#421)
Browse files Browse the repository at this point in the history
* Add many languages

* Added new languages

* Update the languages to add missing ones

* remove extra constant

* Fixed ordering of languages

* Update constants.py
  • Loading branch information
StephanAkkerman authored Nov 12, 2024
1 parent 0dd1329 commit 2165cc5
Showing 1 changed file with 246 additions and 108 deletions.
354 changes: 246 additions & 108 deletions googletrans/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,114 +77,252 @@
}

LANGUAGES = {
'af': 'afrikaans',
'sq': 'albanian',
'am': 'amharic',
'ar': 'arabic',
'hy': 'armenian',
'az': 'azerbaijani',
'eu': 'basque',
'be': 'belarusian',
'bn': 'bengali',
'bs': 'bosnian',
'bg': 'bulgarian',
'ca': 'catalan',
'ceb': 'cebuano',
'ny': 'chichewa',
'zh-cn': 'chinese (simplified)',
'zh-tw': 'chinese (traditional)',
'co': 'corsican',
'hr': 'croatian',
'cs': 'czech',
'da': 'danish',
'nl': 'dutch',
'en': 'english',
'eo': 'esperanto',
'et': 'estonian',
'tl': 'filipino',
'fi': 'finnish',
'fr': 'french',
'fy': 'frisian',
'gl': 'galician',
'ka': 'georgian',
'de': 'german',
'el': 'greek',
'gu': 'gujarati',
'ht': 'haitian creole',
'ha': 'hausa',
'haw': 'hawaiian',
'iw': 'hebrew',
'he': 'hebrew',
'hi': 'hindi',
'hmn': 'hmong',
'hu': 'hungarian',
'is': 'icelandic',
'ig': 'igbo',
'id': 'indonesian',
'ga': 'irish',
'it': 'italian',
'ja': 'japanese',
'jw': 'javanese',
'kn': 'kannada',
'kk': 'kazakh',
'km': 'khmer',
'ko': 'korean',
'ku': 'kurdish (kurmanji)',
'ky': 'kyrgyz',
'lo': 'lao',
'la': 'latin',
'lv': 'latvian',
'lt': 'lithuanian',
'lb': 'luxembourgish',
'mk': 'macedonian',
'mg': 'malagasy',
'ms': 'malay',
'ml': 'malayalam',
'mt': 'maltese',
'mi': 'maori',
'mr': 'marathi',
'mn': 'mongolian',
'my': 'myanmar (burmese)',
'ne': 'nepali',
'no': 'norwegian',
'or': 'odia',
'ps': 'pashto',
'fa': 'persian',
'pl': 'polish',
'pt': 'portuguese',
'pa': 'punjabi',
'ro': 'romanian',
'ru': 'russian',
'sm': 'samoan',
'gd': 'scots gaelic',
'sr': 'serbian',
'st': 'sesotho',
'sn': 'shona',
'sd': 'sindhi',
'si': 'sinhala',
'sk': 'slovak',
'sl': 'slovenian',
'so': 'somali',
'es': 'spanish',
'su': 'sundanese',
'sw': 'swahili',
'sv': 'swedish',
'tg': 'tajik',
'ta': 'tamil',
'te': 'telugu',
'th': 'thai',
'tr': 'turkish',
'tk': 'turkmen',
'uk': 'ukrainian',
'ur': 'urdu',
'ug': 'uyghur',
'uz': 'uzbek',
'vi': 'vietnamese',
'cy': 'welsh',
'xh': 'xhosa',
'yi': 'yiddish',
'yo': 'yoruba',
'zu': 'zulu',
"abk": "abkhaz",
"ace": "acehnese",
"ach": "acholi",
"aar": "afar",
"af": "afrikaans",
"sq": "albanian",
"alz": "alur",
"am": "amharic",
"ar": "arabic",
"hy": "armenian",
"as": "assamese",
"ava": "avar",
"awa": "awadhi",
"ay": "aymara",
"az": "azerbaijani",
"ban": "balinese",
"bal": "baluchi",
"bm": "bambara",
"bci": "baoulé",
"bak": "bashkir",
"eu": "basque",
"btx": "batak karo",
"bts": "batak simalungun",
"bbc": "batak toba",
"be": "belarusian",
"bem": "bemba",
"bn": "bengali",
"bew": "betawi",
"bho": "bhojpuri",
"bik": "bikol",
"bs": "bosnian",
"bre": "breton",
"bg": "bulgarian",
"bua": "buryat",
"yue": "cantonese",
"ca": "catalan",
"ceb": "cebuano",
"cha": "chamorro",
"che": "chechen",
"zh": "chinese",
"zh-cn": "chinese (simplified)",
"zh-tw": "chinese (traditional)",
"chk": "chuukese",
"chv": "chuvash",
"co": "corsican",
"crh": "crimean tatar",
"hr": "croatian",
"cs": "czech",
"da": "danish",
"fa-af": "dari",
"dv": "dhivehi",
"din": "dinka",
"doi": "dogri",
"dom": "dombe",
"nl": "dutch",
"dyu": "dyula",
"dzo": "dzongkha",
"en": "english",
"eo": "esperanto",
"et": "estonian",
"fao": "faroese",
"fij": "fijian",
"fil": "filipino (tagalog)",
"fi": "finnish",
"fon": "fon",
"fr": "french",
"fy": "frisian",
"fur": "friulian",
"ful": "fulani",
"gaa": "ga",
"gl": "galician",
"ka": "georgian",
"de": "german",
"el": "greek",
"gn": "guarani",
"gu": "gujarati",
"ht": "haitian creole",
"cnh": "hakha chin",
"ha": "hausa",
"haw": "hawaiian",
"he": "hebrew",
"iw": "hebrew",
"hil": "hiligaynon",
"hi": "hindi",
"hmn": "hmong",
"hu": "hungarian",
"hrx": "hunsrik",
"iba": "iban",
"is": "icelandic",
"ig": "igbo",
"ilo": "ilocano",
"id": "indonesian",
"ga": "irish",
"it": "italian",
"jam": "jamaican patois",
"ja": "japanese",
"jv": "javanese",
"jw": "javanese",
"kac": "jingpo",
"kal": "kalaallisut",
"kn": "kannada",
"kau": "kanuri",
"pam": "kapampangan",
"kk": "kazakh",
"kha": "khasi",
"km": "khmer",
"cgg": "kiga",
"kik": "kikongo",
"rw": "kinyarwanda",
"ktu": "kituba",
"trp": "kokborok",
"kom": "komi",
"gom": "konkani",
"ko": "korean",
"kri": "krio",
"ku": "kurdish",
"ckb": "kurdish (sorani)",
"ky": "kyrgyz",
"lo": "lao",
"ltg": "latgalian",
"la": "latin",
"lv": "latvian",
"lij": "ligurian",
"lim": "limburgish",
"ln": "lingala",
"lt": "lithuanian",
"lmo": "lombard",
"lg": "luganda",
"luo": "luo",
"lb": "luxembourgish",
"mk": "macedonian",
"mad": "madurese",
"mai": "maithili",
"mak": "makassar",
"mg": "malagasy",
"ms": "malay",
"ms-arab": "malay (jawi)",
"ml": "malayalam",
"mt": "maltese",
"mam": "mam",
"glv": "manx",
"mi": "maori",
"mr": "marathi",
"mah": "marshallese",
"mwr": "marwadi",
"mfe": "mauritian creole",
"mhr": "meadow mari",
"mni-mtei": "meiteilon (manipuri)",
"min": "minang",
"lus": "mizo",
"mn": "mongolian",
"my": "myanmar (burmese)",
"nhe": "nahuatl (eastern huasteca)",
"ndc-zw": "ndau",
"nde": "ndebele (south)",
"new": "nepalbhasa (newari)",
"ne": "nepali",
#'bm-nkoo': 'nko',
"no": "norwegian",
"nus": "nuer",
"ny": "nyanja (chichewa)",
"oci": "occitan",
"or": "odia (oriya)",
"om": "oromo",
"oss": "ossetian",
"pag": "pangasinan",
"pap": "papiamento",
"ps": "pashto",
"fa": "persian",
"pl": "polish",
"por": "portuguese (portugal)",
"pt": "portuguese (portugal, brazil)",
"pa": "punjabi",
"pa-arab": "punjabi (shahmukhi)",
"kek": "q'eqchi'",
"qu": "quechua",
"rom": "romani",
"ro": "romanian",
"run": "rundi",
"ru": "russian",
"sme": "sami (north)",
"sm": "samoan",
"sag": "sango",
"sa": "sanskrit",
"sat": "santali",
"gd": "scots gaelic",
"nso": "sepedi",
"sr": "serbian",
"st": "sesotho",
"crs": "seychellois creole",
"shn": "shan",
"sn": "shona",
"scn": "sicilian",
"szl": "silesian",
"sd": "sindhi",
"si": "sinhala (sinhalese)",
"sk": "slovak",
"sl": "slovenian",
"so": "somali",
"es": "spanish",
"su": "sundanese",
"sus": "susu",
"sw": "swahili",
"ssw": "swati",
"sv": "swedish",
"tl": "tagalog (filipino)",
"tah": "tahitian",
"tg": "tajik",
"ber-atn": "tamazight",
"ber": "tamazight (tifinagh)",
"ta": "tamil",
"tt": "tatar",
"te": "telugu",
"tet": "tetum",
"th": "thai",
"bod": "tibetan",
"ti": "tigrinya",
"tiv": "tiv",
"tpi": "tok pisin",
"ton": "tongan",
"ts": "tsonga",
"tsn": "tswana",
"tcy": "tulu",
"tum": "tumbuka",
"tr": "turkish",
"tk": "turkmen",
"tuk": "tuvan",
"ak": "twi (akan)",
"udm": "udmurt",
"uk": "ukrainian",
"ur": "urdu",
"ug": "uyghur",
"uz": "uzbek",
"ven": "venda",
"vec": "venetian",
"vi": "vietnamese",
"war": "waray",
"cy": "welsh",
"wol": "wolof",
"xh": "xhosa",
"sah": "yakut",
"yi": "yiddish",
"yo": "yoruba",
"yua": "yucatec maya",
"zap": "zapotec",
"zu": "zulu",
}

LANGCODES = dict(map(reversed, LANGUAGES.items()))
Expand Down

0 comments on commit 2165cc5

Please sign in to comment.