diff --git a/app/api/files/PDF.ts b/app/api/files/PDF.ts index 4a2c454133..d1620fdcfa 100644 --- a/app/api/files/PDF.ts +++ b/app/api/files/PDF.ts @@ -85,7 +85,7 @@ class PDF extends EventEmitter { ...conversion, ...this.file, language: - detectLanguage(Object.values(conversion.fullTextWithoutPages).join(''), 'franc') || + detectLanguage(Object.values(conversion.fullTextWithoutPages).join(''), 'ISO639_3') || undefined, processed: true, toc: [], diff --git a/app/shared/detectLanguage.ts b/app/shared/detectLanguage.ts index 18678ea89e..025bc08084 100644 --- a/app/shared/detectLanguage.ts +++ b/app/shared/detectLanguage.ts @@ -1,6 +1,6 @@ import franc from 'franc'; -import { language } from 'shared/languagesList'; +import { language, LanguageCode } from 'shared/languagesList'; -const detectLanguage = (text: string, purpose: 'elastic' | 'franc' | 'ISO639_1' = 'elastic') => +const detectLanguage = (text: string, purpose: LanguageCode = 'elastic') => language(franc(text), purpose); export { detectLanguage }; diff --git a/app/shared/languagesList.ts b/app/shared/languagesList.ts index ad07bc7940..aa6f1804d7 100644 --- a/app/shared/languagesList.ts +++ b/app/shared/languagesList.ts @@ -1,7 +1,7 @@ /* eslint-disable max-lines */ import { LanguageSchema } from 'shared/types/commonTypes'; -type LanguageCode = 'elastic' | 'ISO639_3' | 'ISO639_1' | 'franc'; +type LanguageCode = 'elastic' | 'ISO639_3' | 'ISO639_1'; type LegacyElasticObject = Record< string, diff --git a/app/shared/specs/languages.spec.js b/app/shared/specs/languages.spec.js index 97acbb591f..73cb65f749 100644 --- a/app/shared/specs/languages.spec.js +++ b/app/shared/specs/languages.spec.js @@ -50,10 +50,14 @@ describe('languages', () => { expect(detectLanguage('what is the colour of the white horse of santiago', 'ISO639_1')).toBe( 'en' ); - expect(detectLanguage('de que color es el caballo blanco de santiago', 'franc')).toBe('spa'); - expect(detectLanguage('what is the colour of the white horse of santiago', 'franc')).toBe( + expect(detectLanguage('de que color es el caballo blanco de santiago', 'ISO639_3')).toBe( + 'spa' + ); + expect(detectLanguage('what is the colour of the white horse of santiago', 'ISO639_3')).toBe( 'eng' ); + + expect(detectLanguage('Це перевірка', 'ISO639_3')).toBe('ukr'); }); it('should return other when the language is not supported', () => {