Skip to content

Commit

Permalink
Merge pull request #508 from stonefruit/add-language-constant
Browse files Browse the repository at this point in the history
Add languages constant for languages with traineddata
  • Loading branch information
jeromewu committed Dec 15, 2020
2 parents 7b7f9af + 8f2c33f commit 90466c3
Show file tree
Hide file tree
Showing 2 changed files with 220 additions and 0 deletions.
218 changes: 218 additions & 0 deletions src/constants/languages.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/*
* languages with existing tesseract traineddata
* https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016
*/

/**
* @typedef {object} Languages
* @property {string} AFR Afrikaans
* @property {string} AMH Amharic
* @property {string} ARA Arabic
* @property {string} ASM Assamese
* @property {string} AZE Azerbaijani
* @property {string} AZE_CYRL Azerbaijani - Cyrillic
* @property {string} BEL Belarusian
* @property {string} BEN Bengali
* @property {string} BOD Tibetan
* @property {string} BOS Bosnian
* @property {string} BUL Bulgarian
* @property {string} CAT Catalan; Valencian
* @property {string} CEB Cebuano
* @property {string} CES Czech
* @property {string} CHI_SIM Chinese - Simplified
* @property {string} CHI_TRA Chinese - Traditional
* @property {string} CHR Cherokee
* @property {string} CYM Welsh
* @property {string} DAN Danish
* @property {string} DEU German
* @property {string} DZO Dzongkha
* @property {string} ELL Greek, Modern (1453-)
* @property {string} ENG English
* @property {string} ENM English, Middle (1100-1500)
* @property {string} EPO Esperanto
* @property {string} EST Estonian
* @property {string} EUS Basque
* @property {string} FAS Persian
* @property {string} FIN Finnish
* @property {string} FRA French
* @property {string} FRK German Fraktur
* @property {string} FRM French, Middle (ca. 1400-1600)
* @property {string} GLE Irish
* @property {string} GLG Galician
* @property {string} GRC Greek, Ancient (-1453)
* @property {string} GUJ Gujarati
* @property {string} HAT Haitian; Haitian Creole
* @property {string} HEB Hebrew
* @property {string} HIN Hindi
* @property {string} HRV Croatian
* @property {string} HUN Hungarian
* @property {string} IKU Inuktitut
* @property {string} IND Indonesian
* @property {string} ISL Icelandic
* @property {string} ITA Italian
* @property {string} ITA_OLD Italian - Old
* @property {string} JAV Javanese
* @property {string} JPN Japanese
* @property {string} KAN Kannada
* @property {string} KAT Georgian
* @property {string} KAT_OLD Georgian - Old
* @property {string} KAZ Kazakh
* @property {string} KHM Central Khmer
* @property {string} KIR Kirghiz; Kyrgyz
* @property {string} KOR Korean
* @property {string} KUR Kurdish
* @property {string} LAO Lao
* @property {string} LAT Latin
* @property {string} LAV Latvian
* @property {string} LIT Lithuanian
* @property {string} MAL Malayalam
* @property {string} MAR Marathi
* @property {string} MKD Macedonian
* @property {string} MLT Maltese
* @property {string} MSA Malay
* @property {string} MYA Burmese
* @property {string} NEP Nepali
* @property {string} NLD Dutch; Flemish
* @property {string} NOR Norwegian
* @property {string} ORI Oriya
* @property {string} PAN Panjabi; Punjabi
* @property {string} POL Polish
* @property {string} POR Portuguese
* @property {string} PUS Pushto; Pashto
* @property {string} RON Romanian; Moldavian; Moldovan
* @property {string} RUS Russian
* @property {string} SAN Sanskrit
* @property {string} SIN Sinhala; Sinhalese
* @property {string} SLK Slovak
* @property {string} SLV Slovenian
* @property {string} SPA Spanish; Castilian
* @property {string} SPA_OLD Spanish; Castilian - Old
* @property {string} SQI Albanian
* @property {string} SRP Serbian
* @property {string} SRP_LATN Serbian - Latin
* @property {string} SWA Swahili
* @property {string} SWE Swedish
* @property {string} SYR Syriac
* @property {string} TAM Tamil
* @property {string} TEL Telugu
* @property {string} TGK Tajik
* @property {string} TGL Tagalog
* @property {string} THA Thai
* @property {string} TIR Tigrinya
* @property {string} TUR Turkish
* @property {string} UIG Uighur; Uyghur
* @property {string} UKR Ukrainian
* @property {string} URD Urdu
* @property {string} UZB Uzbek
* @property {string} UZB_CYRL Uzbek - Cyrillic
* @property {string} VIE Vietnamese
* @property {string} YID Yiddish
*/

/**
* @type {Languages}
*/
module.exports = {
AFR: 'afr',
AMH: 'amh',
ARA: 'ara',
ASM: 'asm',
AZE: 'aze',
AZE_CYRL: 'aze_cyrl',
BEL: 'bel',
BEN: 'ben',
BOD: 'bod',
BOS: 'bos',
BUL: 'bul',
CAT: 'cat',
CEB: 'ceb',
CES: 'ces',
CHI_SIM: 'chi_sim',
CHI_TRA: 'chi_tra',
CHR: 'chr',
CYM: 'cym',
DAN: 'dan',
DEU: 'deu',
DZO: 'dzo',
ELL: 'ell',
ENG: 'eng',
ENM: 'enm',
EPO: 'epo',
EST: 'est',
EUS: 'eus',
FAS: 'fas',
FIN: 'fin',
FRA: 'fra',
FRK: 'frk',
FRM: 'frm',
GLE: 'gle',
GLG: 'glg',
GRC: 'grc',
GUJ: 'guj',
HAT: 'hat',
HEB: 'heb',
HIN: 'hin',
HRV: 'hrv',
HUN: 'hun',
IKU: 'iku',
IND: 'ind',
ISL: 'isl',
ITA: 'ita',
ITA_OLD: 'ita_old',
JAV: 'jav',
JPN: 'jpn',
KAN: 'kan',
KAT: 'kat',
KAT_OLD: 'kat_old',
KAZ: 'kaz',
KHM: 'khm',
KIR: 'kir',
KOR: 'kor',
KUR: 'kur',
LAO: 'lao',
LAT: 'lat',
LAV: 'lav',
LIT: 'lit',
MAL: 'mal',
MAR: 'mar',
MKD: 'mkd',
MLT: 'mlt',
MSA: 'msa',
MYA: 'mya',
NEP: 'nep',
NLD: 'nld',
NOR: 'nor',
ORI: 'ori',
PAN: 'pan',
POL: 'pol',
POR: 'por',
PUS: 'pus',
RON: 'ron',
RUS: 'rus',
SAN: 'san',
SIN: 'sin',
SLK: 'slk',
SLV: 'slv',
SPA: 'spa',
SPA_OLD: 'spa_old',
SQI: 'sqi',
SRP: 'srp',
SRP_LATN: 'srp_latn',
SWA: 'swa',
SWE: 'swe',
SYR: 'syr',
TAM: 'tam',
TEL: 'tel',
TGK: 'tgk',
TGL: 'tgl',
THA: 'tha',
TIR: 'tir',
TUR: 'tur',
UIG: 'uig',
UKR: 'ukr',
URD: 'urd',
UZB: 'uzb',
UZB_CYRL: 'uzb_cyrl',
VIE: 'vie',
YID: 'yid',
};
2 changes: 2 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ require('regenerator-runtime/runtime');
const createScheduler = require('./createScheduler');
const createWorker = require('./createWorker');
const Tesseract = require('./Tesseract');
const languages = require('./constants/languages');
const OEM = require('./constants/OEM');
const PSM = require('./constants/PSM');
const { setLogging } = require('./utils/log');

module.exports = {
languages,
OEM,
PSM,
createScheduler,
Expand Down

0 comments on commit 90466c3

Please sign in to comment.