Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor language list #7516

Open
wants to merge 14 commits into
base: production
Choose a base branch
from
2 changes: 1 addition & 1 deletion app/api/activitylog/helpers.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { typeParsers } from 'api/activitylog/migrationsParser';
import templates from 'api/templates/templates';
import entities from 'api/entities/entities';
Expand Down
2 changes: 1 addition & 1 deletion app/api/csv/importThesauri.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { createError } from 'api/utils';
import csvtojson from 'csvtojson';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { ensure } from 'shared/tsUtils';
import { LanguageSchema } from 'shared/types/commonTypes';
import { ThesaurusValueSchema } from 'shared/types/thesaurusType';
Expand Down
2 changes: 1 addition & 1 deletion app/api/entities.v2/types/EntityInputDataSchema.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ISO6391Codes } from 'shared/languagesList';
import { ISO6391Codes } from 'shared/language';

const linkSchema = {
type: 'object',
Expand Down
2 changes: 1 addition & 1 deletion app/api/i18n/defaultTranslations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { readFile, readdir } from 'fs/promises';

import { CSVLoader } from 'api/csv';
import { objectIndex } from 'shared/data_utils/objectIndex';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';

const availableLanguagesByKey = objectIndex(
availableLanguages,
Expand Down
2 changes: 1 addition & 1 deletion app/api/i18n/specs/routes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import settings from 'api/settings';
import { getFixturesFactory } from 'api/utils/fixturesFactory';
import { testingEnvironment } from 'api/utils/testingEnvironment';
import { TestEmitSources, iosocket, setUpApp } from 'api/utils/testingRoutes';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { LanguageSchema } from 'shared/types/commonTypes';
import { UserRole } from 'shared/types/userSchema';
import { DefaultTranslations } from '../defaultTranslations';
Expand Down
2 changes: 1 addition & 1 deletion app/api/i18n/translations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { TranslationContext, TranslationType, TranslationValue } from 'shared/tr
// eslint-disable-next-line node/no-restricted-import
import { createWriteStream } from 'fs';
import { ObjectId } from 'mongodb';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { ContextType } from 'shared/translationSchema';
import { LanguageISO6391 } from 'shared/types/commonTypes';
import { pipeline } from 'stream/promises';
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* eslint-disable no-await-in-loop */
import languages from 'shared/languages';
import languages from './languages';

const getDefaultLanguage = async db => {
const settings = await db.collection('settings').find().toArray();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const elasticLanguages: {
[index: string]: { franc: string; elastic: string; ISO639_1: string | null };
} = {
arb: { franc: 'arb', elastic: 'arabic', ISO639_1: 'ar' },
bul: { franc: 'bul', elastic: 'bulgarian', ISO639_1: 'bg' },
cat: { franc: 'cat', elastic: 'catalan', ISO639_1: 'ca' },
cjk: { franc: 'cjk', elastic: 'cjk', ISO639_1: null },
ckb: { franc: 'ckb', elastic: 'sorani', ISO639_1: null },
ces: { franc: 'ces', elastic: 'czech', ISO639_1: 'cs' },
dan: { franc: 'dan', elastic: 'danish', ISO639_1: 'da' },
deu: { franc: 'deu', elastic: 'german', ISO639_1: 'de' },
ell: { franc: 'ell', elastic: 'greek', ISO639_1: 'el' },
eng: { franc: 'eng', elastic: 'english', ISO639_1: 'en' },
eus: { franc: 'eus', elastic: 'basque', ISO639_1: 'eu' },
fas: { franc: 'fas', elastic: 'persian', ISO639_1: 'fa' },
fin: { franc: 'fin', elastic: 'finnish', ISO639_1: 'fi' },
fra: { franc: 'fra', elastic: 'french', ISO639_1: 'fr' },
gle: { franc: 'gle', elastic: 'irish', ISO639_1: 'ga' },
glg: { franc: 'glg', elastic: 'galician', ISO639_1: 'gl' },
hin: { franc: 'hin', elastic: 'hindi', ISO639_1: 'hi' },
hun: { franc: 'hun', elastic: 'hungarian', ISO639_1: 'hu' },
hye: { franc: 'hye', elastic: 'armenian', ISO639_1: 'hy' },
ind: { franc: 'ind', elastic: 'indonesian', ISO639_1: 'id' },
ita: { franc: 'ita', elastic: 'italian', ISO639_1: 'it' },
lav: { franc: 'lav', elastic: 'latvian', ISO639_1: 'lv' },
lit: { franc: 'lit', elastic: 'lithuanian', ISO639_1: 'lt' },
nld: { franc: 'nld', elastic: 'dutch', ISO639_1: 'nl' },
nno: { franc: 'nno', elastic: 'norwegian', ISO639_1: 'nn' },
nob: { franc: 'nob', elastic: 'norwegian', ISO639_1: 'nb' },
por: { franc: 'por', elastic: 'portuguese', ISO639_1: 'pt' },
ron: { franc: 'ron', elastic: 'romanian', ISO639_1: 'ro' },
rus: { franc: 'rus', elastic: 'russian', ISO639_1: 'ru' },
spa: { franc: 'spa', elastic: 'spanish', ISO639_1: 'es' },
swe: { franc: 'swe', elastic: 'swedish', ISO639_1: 'sv' },
tha: { franc: 'tha', elastic: 'thai', ISO639_1: 'th' },
tur: { franc: 'tur', elastic: 'turkish', ISO639_1: 'tr' },
};

export { elasticLanguages };
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import { elasticLanguages } from './languageList';

export default {
data: Object.keys(elasticLanguages).map(k => elasticLanguages[k]),
};
4 changes: 2 additions & 2 deletions app/api/search/entitiesIndex.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { detectLanguage } from 'shared/detectLanguage';
import { language as languages } from 'shared/languagesList';
import entities from 'api/entities';
import { legacyLogger } from 'api/log';
import { entityDefaultDocument } from 'shared/entityDefaultDocument';
Expand All @@ -8,6 +7,7 @@ import { ElasticEntityMapper } from 'api/entities.v2/database/ElasticEntityMappe
import { MongoTemplatesDataSource } from 'api/templates.v2/database/MongoTemplatesDataSource';
import { getConnection } from 'api/common.v2/database/getConnectionForCurrentTenant';
import { MongoSettingsDataSource } from 'api/settings.v2/database/MongoSettingsDataSource';
import { LanguageUtils } from 'shared/language';
import { DefaultTransactionManager } from 'api/common.v2/database/data_source_defaults';
import elasticMapping from '../../../database/elastic_mapping/elastic_mapping';
import elasticMapFactory from '../../../database/elastic_mapping/elasticMapFactory';
Expand Down Expand Up @@ -50,7 +50,7 @@ function setFullTextSettings(defaultDocument, id, body, doc) {
language = detectLanguage(fullText);
}
if (defaultDocument.language) {
language = languages(defaultDocument.language);
language = LanguageUtils.fromISO639_3(defaultDocument.language).elastic;
}
const fullTextObject = {
[`fullText_${language}`]: fullText,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import settings from 'api/settings/settings';
import templatesModel from 'api/templates/templates';
import dictionatiesModel from 'api/thesauri/dictionariesModel';
import request from 'shared/JSONRequest';
import languages from 'shared/languages';
import { EntitySchema } from 'shared/types/entityType';
import { ExtractedMetadataSchema, ObjectIdSchema, PropertySchema } from 'shared/types/commonTypes';
import { ModelStatus } from 'shared/types/IXModelSchema';
Expand All @@ -36,6 +35,7 @@ import {
} from 'api/services/informationextraction/getFiles';
import { Suggestions } from 'api/suggestions/suggestions';
import { IXExtractorType } from 'shared/types/extractorType';
import { LanguageUtils } from 'shared/language';
import { IXModelType } from 'shared/types/IXModelType';
import { ParagraphSchema } from 'shared/types/segmentationType';
import ixmodels from './ixmodels';
Expand Down Expand Up @@ -174,7 +174,8 @@ class InformationExtraction {
file: FileWithAggregation,
_data: CommonMaterialsData
): MaterialsData => {
const languageIso = languages.get(file.language!, 'ISO639_1') || defaultTrainingLanguage;
const languageIso =
LanguageUtils.fromISO639_3(file.language!, false)?.ISO639_1 || defaultTrainingLanguage;

let data: MaterialsData = { ..._data, language_iso: languageIso };

Expand Down Expand Up @@ -257,7 +258,7 @@ class InformationExtraction {
_getEntityFromFile = async (file: EnforcedWithId<FileType> | FileWithAggregation) => {
let [entity] = await entities.getUnrestricted({
sharedId: file.entity,
language: languages.get(file.language!, 'ISO639_1'),
language: LanguageUtils.fromISO639_3(file.language!)?.ISO639_1,
});

if (!entity) {
Expand Down Expand Up @@ -346,7 +347,7 @@ class InformationExtraction {
...existingSuggestions,
entityId: entity.sharedId!,
fileId: file._id,
language: languages.get(file.language, 'ISO639_1') || 'other',
language: LanguageUtils.fromISO639_3(file.language)?.ISO639_1 || 'other',
extractorId: extractor._id,
propertyName: extractor.property,
status: 'processing',
Expand Down
4 changes: 2 additions & 2 deletions app/api/services/informationextraction/getFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ import { objectIndex } from 'shared/data_utils/objectIndex';
import settings from 'api/settings/settings';
import templatesModel from 'api/templates/templates';
import { propertyTypes } from 'shared/propertyTypes';
import languages from 'shared/languages';
import { ensure } from 'shared/tsUtils';
import { LanguageUtils } from 'shared/language';

const BATCH_SIZE = 50;
const MAX_TRAINING_FILES_NUMBER = 2000;
Expand Down Expand Up @@ -202,7 +202,7 @@ async function getFilesForTraining(templates: ObjectIdSchema[], property: string
const defaultLang = (await settings.getDefaultLanguage())?.key;

const filesWithEntityValue = files.map(file => {
const fileLang = languages.get(file.language, 'ISO639_1') || defaultLang;
const fileLang = LanguageUtils.fromISO639_3(file.language, false)?.ISO639_1 || defaultLang;
const entity = indexedEntities[file.entity + fileLang];
if (!entity?.metadata || !entity?.metadata[property]?.length) {
return { ...file, propertyType };
Expand Down
6 changes: 3 additions & 3 deletions app/api/services/ocr/OcrManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import settings from 'api/settings/settings';
import { emitToTenant } from 'api/socketio/setupSockets';
import { tenants } from 'api/tenants/tenantContext';
import createError from 'api/utils/Error';
import { LanguageUtils } from 'shared/language';
import { handleError } from 'api/utils/handleError';
// eslint-disable-next-line node/no-restricted-import
import { createReadStream, createWriteStream } from 'fs';
import request from 'shared/JSONRequest';
import { language as getLanguage } from 'shared/languagesList';
import { FileType } from 'shared/types/fileType';
import { Readable } from 'stream';
import { pipeline } from 'stream/promises';
Expand Down Expand Up @@ -146,7 +146,7 @@ const processResults = async (message: ResultsMessage): Promise<void> => {
const validateLanguage = async (language: string, ocrSettings?: { url: string }) => {
const _ocrSettings = ocrSettings || (await getSettings());
const supportedLanguages = await fetchSupportedLanguages(_ocrSettings);
return supportedLanguages.includes(getLanguage(language, 'ISO639_1')!);
return supportedLanguages.includes(LanguageUtils.fromISO639_3(language)?.ISO639_1!);
};

const getStatus = async (file: EnforcedWithId<FileType>) => {
Expand Down Expand Up @@ -219,7 +219,7 @@ class OcrManager {
tenant: tenant.name,
params: {
filename: file.filename,
language: getLanguage(file.language!, 'ISO639_1'),
language: LanguageUtils.fromISO639_3(file.language!)?.ISO639_1,
},
});

Expand Down
4 changes: 2 additions & 2 deletions app/api/suggestions/blankSuggestions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import { files } from 'api/files';
import { EnforcedWithId } from 'api/odm';
import settings from 'api/settings';
import { propertyTypeIsMultiValued } from 'api/services/informationextraction/getFiles';
import languages from 'shared/languages';
import { ObjectIdSchema } from 'shared/types/commonTypes';
import { IXExtractorType } from 'shared/types/extractorType';
import { FileType } from 'shared/types/fileType';
import { IXSuggestionType } from 'shared/types/suggestionType';
import { Suggestions } from './suggestions';
import templates from 'api/templates';
import { LanguageUtils } from 'shared/language';

const fetchEntitiesBatch = async (query: any, limit: number = 100) =>
entitiesModel.db.find(query).select('sharedId').limit(limit).sort({ _id: 1 }).lean();
Expand Down Expand Up @@ -49,7 +49,7 @@ export const getBlankSuggestion = (
defaultLanguage: string
) => ({
language: file.language
? languages.get(file.language, 'ISO639_1') || defaultLanguage
? LanguageUtils.fromISO639_3(file.language, false)?.ISO639_1 || defaultLanguage
: defaultLanguage,
fileId: file._id,
entityId: file.entity!,
Expand Down
4 changes: 2 additions & 2 deletions app/react/App/Root.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import PropTypes from 'prop-types';
import React, { Component } from 'react';
import serialize from 'serialize-javascript';

import { availableLanguages as languagesList } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';

const determineHotAssets = query => ({
JS: [
Expand Down Expand Up @@ -87,7 +87,7 @@ class Root extends Component {
const isHotReload = process.env.HOT;
const { head, language, assets, reduxData, content } = this.props;

const languageData = languagesList.find(l => l.key === language);
const languageData = availableLanguages.find(l => l.key === language);
const query = languageData && languageData.rtl ? '?rtl=true' : '';

const { JS, CSS } = isHotReload
Expand Down
8 changes: 4 additions & 4 deletions app/react/Attachments/components/AttachmentForm.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ import { connect } from 'react-redux';
import { Form, Field } from 'react-redux-form';

import { FormGroup, Select } from 'app/ReactReduxForms';
import { elasticLanguages } from 'shared/languagesList';
import { elasticLanguages } from 'shared/language';
import t from 'app/I18N/t';
import ShowIf from 'app/App/ShowIf';

export class AttachmentForm extends Component {
render() {
const { model } = this.props;
const validators = { originalname: { required: val => !!val && val.trim() !== '' } };
const languageOptions = Object.keys(elasticLanguages).map(key => ({
value: elasticLanguages[key].franc,
label: elasticLanguages[key].elastic,
const languageOptions = elasticLanguages.map(language => ({
value: language.ISO639_3,
label: language.elastic,
}));
languageOptions.push({ value: 'other', label: 'other' });

Expand Down
6 changes: 4 additions & 2 deletions app/react/Attachments/components/File.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { wrapDispatch } from 'app/Multireducer';
import { TocGeneratedLabel } from 'app/ToggledFeatures/tocGeneration';
import { NeedAuthorization } from 'app/Auth';
import { LocalForm } from 'app/Forms/Form';
import { availableLanguages, getLanguageSchema } from 'shared/languagesList';
import { availableLanguages, LanguageUtils } from 'shared/language';
import { isBlobFile } from 'shared/tsUtils';
import { EntitySchema } from 'shared/types/entityType';
import { FileType } from 'shared/types/fileType';
Expand Down Expand Up @@ -105,7 +105,9 @@ class File extends Component<FileOwnProps, FileState> {
<div>
<div>
<span className="badge">
<Translate>{language ? getLanguageSchema(language)?.label || '' : ''}</Translate>
<Translate>
{language ? LanguageUtils.fromISO639_3(language)?.label || '' : ''}
</Translate>
</span>
<TocGeneratedLabel file={this.props.file}>
<Translate>ML TOC</Translate>
Expand Down
4 changes: 2 additions & 2 deletions app/react/Attachments/components/FileList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import { Translate } from 'app/I18N';
import { FileType } from 'shared/types/fileType';
import { EntitySchema } from 'shared/types/entityType';
import UploadButton from 'app/Metadata/components/UploadButton';
import { LanguageUtils } from 'shared/language';
import { NeedAuthorization } from 'app/Auth';
import languageLib from 'shared/languages';
import { ConnectedFile as File } from './File';
import './scss/filelist.scss';

Expand All @@ -28,7 +28,7 @@ export type FileListProps = {
const orderFilesByLanguage = (files: FileType[], systemLanguage: string) => {
const orderedFiles = [...files];
const fileIndex = orderedFiles.findIndex(file => {
const language = languageLib.get(file.language as string, 'ISO639_1');
const language = LanguageUtils.fromISO639_3(file.language as string)?.ISO639_1;
return language === systemLanguage;
});
if (fileIndex > -1) {
Expand Down
6 changes: 3 additions & 3 deletions app/react/Attachments/components/specs/FileList.spec.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import React from 'react';
import { shallow, ShallowWrapper } from 'enzyme';
import { FileType } from 'shared/types/fileType';
import UploadButton from 'app/Metadata/components/UploadButton';
import { LanguageUtils } from 'shared/language';
import { EntitySchema } from 'shared/types/entityType';
import languageLib from 'shared/languages';
import { ConnectedFile as File } from '../File';
import { FileList, FileListProps } from '../FileList';

Expand Down Expand Up @@ -49,7 +49,7 @@ describe('FileList', () => {
expect(renderedFiles.at(0).props().file).toBe(file);
expect(renderedFiles.at(1).props().file).toBe(file2);
const firstFile = renderedFiles.at(0).props().file;
const language = languageLib.get(firstFile.language as string, 'ISO639_1');
const language = LanguageUtils.fromISO639_3(firstFile.language as string)?.ISO639_1;
expect(entity.language).toEqual(language);
});

Expand All @@ -58,7 +58,7 @@ describe('FileList', () => {
render();
const renderedFiles = component.find(File);
const firstFile = renderedFiles.at(0).props().file;
const language = languageLib.get(firstFile.language as string, 'ISO639_1');
const language = LanguageUtils.fromISO639_3(firstFile.language as string)?.ISO639_1;
expect(entity.language).toEqual(language);
});

Expand Down
6 changes: 3 additions & 3 deletions app/react/Layout/DocumentLanguage.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import PropTypes from 'prop-types';
import React, { Component } from 'react';
import { connect } from 'react-redux';

import { language as getLanguage } from 'shared/languagesList';
import { LanguageUtils } from 'shared/language';
import t from '../I18N/t';

export class DocumentLanguage extends Component {
Expand All @@ -16,13 +16,13 @@ export class DocumentLanguage extends Component {
if (doc.get('file')) {
const fileLanguage = doc.getIn(['file', 'language']);
if (fileLanguage && fileLanguage !== 'other') {
if (this.props.locale === getLanguage(fileLanguage, 'ISO639_1')) {
if (this.props.locale === LanguageUtils.fromISO639_3(fileLanguage, false)?.ISO639_1) {
return null;
}

return (
<span className="item-type__documentLanguage">
<span>{getLanguage(fileLanguage, 'ISO639_1') || fileLanguage}</span>
<span>{LanguageUtils.fromISO639_3(fileLanguage, false)?.ISO639_1 || fileLanguage}</span>
</span>
);
}
Expand Down
4 changes: 2 additions & 2 deletions app/react/UI/Icon/Icon.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import React from 'react';
import { connect } from 'react-redux';

import { FontAwesomeIcon } from '@fortawesome/react-fontawesome';
import { availableLanguages as languagesList } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { loadIcons } from './library';

loadIcons();

const Icon = ({ locale = '', ...ownProps }) => {
const languageData = languagesList.find(l => l.key === locale);
const languageData = availableLanguages.find(l => l.key === locale);
return (
<FontAwesomeIcon {...ownProps} flip={languageData && languageData.rtl ? 'horizontal' : null} />
);
Expand Down
2 changes: 1 addition & 1 deletion app/react/V2/Components/UI/Sidepanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import React from 'react';
import { Transition } from '@headlessui/react';
import { useParams } from 'react-router-dom';
import { XMarkIcon } from '@heroicons/react/20/solid';
import { availableLanguages } from 'shared/languagesList';
import { availableLanguages } from 'shared/language';
import { Translate } from 'app/I18N';

interface SidePanelProps {
Expand Down
Loading
Loading