Skip to content

Commit

Permalink
fix: SJIP-882 make use of executeSearchAfterQuery for manifests
Browse files Browse the repository at this point in the history
  • Loading branch information
aperron-ferlab committed Jul 3, 2024
1 parent e0d6160 commit 9626a84
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 22 deletions.
22 changes: 21 additions & 1 deletion src/reports/file-manifest/fileManifestStats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
import { Request, Response } from 'express';

import EsInstance from '../../ElasticSearchClientInstance';
import { PROJECT } from '../../env';
import { reportGenerationErrorHandler } from '../../errors';
import { ProjectType } from '../types';
import getFamilyIds from '../utils/getFamilyIds';
import getFilesFromSqon from '../utils/getFilesFromSqon';
import configInclude from './configInclude';
import configKf from './configKf';

interface IFileByDataType {
key: string;
Expand All @@ -21,12 +25,28 @@ const fileManifestStats = async (req: Request, res: Response): Promise<void> =>
const userId = req['kauth']?.grant?.access_token?.content?.sub;
const accessToken = req.headers.authorization;

let reportConfig;
const p = PROJECT.toLowerCase().trim();
if (p === ProjectType.include) {
reportConfig = configInclude;
} else {
reportConfig = configKf;
}

const wantedFields = ['file_id', 'data_type', 'size', 'participants.participant_id'];

const esClient = EsInstance.getInstance();

try {
const files = await getFilesFromSqon(esClient, projectId, sqon, userId, accessToken, wantedFields);
const files = await getFilesFromSqon(
esClient,
reportConfig,
projectId,
sqon,
userId,
accessToken,
wantedFields,
);

const newFiles = withFamily
? await getFamilyIds(
Expand Down
13 changes: 12 additions & 1 deletion src/reports/file-manifest/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,22 @@ const fileManifestReport = async (req: Request, res: Response): Promise<void> =>
const esClient = EsInstance.getInstance();

try {
const files = await getFilesFromSqon(esClient, projectId, sqon, userId, accessToken, wantedFields);
const files = await getFilesFromSqon(
esClient,
reportConfig,
projectId,
sqon,
userId,
accessToken,
wantedFields,
);
const fileIds = files?.map((f) => f.file_id);
console.log('OriginalFileIds: ', fileIds);
const newFileIds = withFamily ? await getFamilyIds(esClient, fileIds) : fileIds;
console.log('New unique file ids: ', newFileIds.length);

const filesInfos = await getInfosByConfig(esClient, reportConfig, newFileIds, 'file_id', esFileIndex);
console.log('filesInfos: ', filesInfos.length);

const path = `/tmp/${filename}.tsv`;
await generateTsvReport(filesInfos, path, reportConfig);
Expand Down
23 changes: 16 additions & 7 deletions src/reports/utils/getFamilyIds.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { Client } from '@elastic/elasticsearch';
import { env } from 'process';

import { ES_QUERY_MAX_SIZE, esFileIndex } from '../../env';
import { executeSearch } from '../../utils/esUtils';
import { executeSearch, executeSearchAfterQuery } from '../../utils/esUtils';

interface IFileInfo {
data_type: string;
Expand All @@ -14,11 +15,18 @@ const getFilesInfo = async (fileIds: string[], es: Client): Promise<IFileInfo[]>
query: { bool: { must: [{ terms: { file_id: fileIds, boost: 0 } }] } },
_source: ['file_id', 'data_type', 'participants.families_id'],
sort: [{ data_type: { order: 'asc' } }],
size: ES_QUERY_MAX_SIZE,
};
const results = await executeSearch(es, esFileIndex, esRequest);
const hits = results?.body?.hits?.hits || [];
const sources = hits.map((hit) => hit._source);
const sources: any[] = [];
await executeSearchAfterQuery(es, esFileIndex, esRequest, {
onPageFetched: (pageHits) => {
sources.push(...pageHits);
},
onFinish: (total) => {
console.log(`Finished fetching all pages in getFilesInfo. Total hits: ${total}`);
},
pageSize: Number(env.ES_PAGESIZE),
});

const filesInfos = [];
sources.forEach((source) => {
source.participants &&
Expand Down Expand Up @@ -83,10 +91,11 @@ const getFilesIdsMatched = async (filesInfos: IFileInfo[], es: Client): Promise<
*/
const getFamilyIds = async (es: Client, fileIds: string[]): Promise<string[]> => {
const filesInfos = await getFilesInfo(fileIds, es);
console.log('filesInfos: ', filesInfos.length);
const filesIdsMatched = await getFilesIdsMatched(filesInfos, es);
const newFileIds = [...new Set([...fileIds, ...filesIdsMatched])];
console.log('filesIdsMatched: ', filesIdsMatched.length);

return newFileIds;
return [...new Set([...fileIds, ...filesIdsMatched])];
};

export default getFamilyIds;
26 changes: 20 additions & 6 deletions src/reports/utils/getFilesFromSqon.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import { buildQuery } from '@arranger/middleware';
import { Client } from '@elastic/elasticsearch';
import { env } from 'process';

import { ES_QUERY_MAX_SIZE, esFileAlias, esFileIndex } from '../../env';
import { esFileAlias, esFileIndex } from '../../env';
import { getExtendedConfigs, getNestedFields } from '../../utils/arrangerUtils';
import { executeSearch } from '../../utils/esUtils';
import { executeSearchAfterQuery } from '../../utils/esUtils';
import { Sqon } from '../../utils/setsTypes';
import { resolveSetsInSqon } from '../../utils/sqonUtils';
import { SheetConfig } from '../types';

/**
* Generate a sqon from the family_id of all the participants in the given `sqon`.
Expand All @@ -19,6 +21,7 @@ import { resolveSetsInSqon } from '../../utils/sqonUtils';
*/
const getFilesFromSqon = async (
es: Client,
reportConfig: SheetConfig,
projectId: string,
sqon: Sqon,
userId: string,
Expand All @@ -29,10 +32,21 @@ const getFilesFromSqon = async (
const nestedFields = getNestedFields(extendedConfig);
const newSqon = await resolveSetsInSqon(sqon, userId, accessToken);
const query = buildQuery({ nestedFields, filters: newSqon });
const results = await executeSearch(es, esFileIndex, { query, size: ES_QUERY_MAX_SIZE, _source: fieldsWanted });
const hits = results?.body?.hits?.hits || [];
const sources = hits.map((hit) => hit._source);
return sources;
const esQuery = { query, _source: fieldsWanted, sort: reportConfig.sort };

const allHits: any[] = [];

await executeSearchAfterQuery(es, esFileIndex, esQuery, {
onPageFetched: (pageHits) => {
allHits.push(...pageHits);
},
onFinish: (total) => {
console.log(`Finished fetching all pages in getFilesFromSqon. Total hits: ${total}`);
},
pageSize: Number(env.ES_PAGESIZE),
});

return allHits;
};

export default getFilesFromSqon;
21 changes: 14 additions & 7 deletions src/reports/utils/getInfosByConfig.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { Client } from '@elastic/elasticsearch';
import get from 'lodash/get';
import { env } from 'process';

import { ES_QUERY_MAX_SIZE } from '../../env';
import { executeSearch } from '../../utils/esUtils';
import { executeSearchAfterQuery } from '../../utils/esUtils';
import { SheetConfig } from '../types';

/** this recursive func allow you to find nested array values */
Expand All @@ -28,12 +28,19 @@ const getInfosByConfig = async (
query: { bool: { must: [{ terms: { [idField]: ids, boost: 0 } }] } },
_source: [...config.columns.map((e) => e.field), ...(extraFields || [])],
sort: config.sort,
size: ES_QUERY_MAX_SIZE,
};
const results = await executeSearch(es, esIndex, esRequest);
const hits = results?.body?.hits?.hits || [];
const sources = hits.map((hit) => hit._source);
return sources.map((source) =>
const sources: any[] = [];
await executeSearchAfterQuery(es, esIndex, esRequest, {
onPageFetched: (pageHits) => {
sources.push(...pageHits);
},
onFinish: (total) => {
console.log(`Finished fetching all pages in getInfosByConfig. Total hits: ${total}`);
},
pageSize: Number(env.ES_PAGESIZE),
});

return (sources as any).map((source) =>
config.columns.reduce((data, column) => {
const field = column.field;
/** default case example: field = 'file_id' */
Expand Down

0 comments on commit 9626a84

Please sign in to comment.