Skip to content

Commit

Permalink
fix mapping from detectLanguage service
Browse files Browse the repository at this point in the history
  • Loading branch information
Joao-vi committed Dec 2, 2024
1 parent 77db066 commit 2cffcf6
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 6 deletions.
2 changes: 1 addition & 1 deletion app/api/files/PDF.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class PDF extends EventEmitter {
...conversion,
...this.file,
language:
detectLanguage(Object.values(conversion.fullTextWithoutPages).join(''), 'franc') ||
detectLanguage(Object.values(conversion.fullTextWithoutPages).join(''), 'ISO639_3') ||
undefined,
processed: true,
toc: [],
Expand Down
4 changes: 2 additions & 2 deletions app/shared/detectLanguage.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import franc from 'franc';
import { language } from 'shared/languagesList';
import { language, LanguageCode } from 'shared/languagesList';

const detectLanguage = (text: string, purpose: 'elastic' | 'franc' | 'ISO639_1' = 'elastic') =>
const detectLanguage = (text: string, purpose: LanguageCode = 'elastic') =>
language(franc(text), purpose);
export { detectLanguage };
2 changes: 1 addition & 1 deletion app/shared/languagesList.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* eslint-disable max-lines */
import { LanguageSchema } from 'shared/types/commonTypes';

type LanguageCode = 'elastic' | 'ISO639_3' | 'ISO639_1' | 'franc';
type LanguageCode = 'elastic' | 'ISO639_3' | 'ISO639_1';

type LegacyElasticObject = Record<
string,
Expand Down
8 changes: 6 additions & 2 deletions app/shared/specs/languages.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ describe('languages', () => {
expect(detectLanguage('what is the colour of the white horse of santiago', 'ISO639_1')).toBe(
'en'
);
expect(detectLanguage('de que color es el caballo blanco de santiago', 'franc')).toBe('spa');
expect(detectLanguage('what is the colour of the white horse of santiago', 'franc')).toBe(
expect(detectLanguage('de que color es el caballo blanco de santiago', 'ISO639_3')).toBe(
'spa'
);
expect(detectLanguage('what is the colour of the white horse of santiago', 'ISO639_3')).toBe(
'eng'
);

expect(detectLanguage('Це перевірка', 'ISO639_3')).toBe('ukr');
});

it('should return other when the language is not supported', () => {
Expand Down

0 comments on commit 2cffcf6

Please sign in to comment.