Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: SJIP-882 make use of executeSearchAfterQuery for manifests #123

Merged
merged 3 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import { buildQuery } from '@arranger/middleware';
import { Client } from '@elastic/elasticsearch';
import xl from 'excel4node';
import noop from 'lodash/noop';
import { env } from 'process';

import { ES_PAGESIZE } from '../../env';
import { getExtendedConfigs, getNestedFields } from '../../utils/arrangerUtils';
import { executeSearchAfterQuery } from '../../utils/esUtils';
import ExtendedReportConfigs from '../../utils/extendedReportConfigs';
Expand All @@ -15,7 +15,8 @@ import generateTxtFile from '../utils/generateTxtFile';
import { addConditionAvailableInSqon } from '../utils/getAvailableBiospecimensFromSqon';

// eslint-disable-next-line max-len
const cbtn_instructions_mock = 'To request biospecimens from CBTN, please use the request form (https://airtable.com/apperYvVD82ti3021/pagdArwI0TxJQpiVW/form). General inquiries can be directed to research@cbtn.org.';
const cbtn_instructions_mock =
'To request biospecimens from CBTN, please use the request form (https://airtable.com/apperYvVD82ti3021/pagdArwI0TxJQpiVW/form). General inquiries can be directed to research@cbtn.org.';

/**
* Generate and write locally.
Expand Down Expand Up @@ -90,7 +91,7 @@ export default async function generateFiles(
}
},
onFinish: noop,
pageSize: Number(env.ES_PAGESIZE),
pageSize: ES_PAGESIZE,
});
} catch (err) {
console.error(`Error while fetching the data for biospecimen request`);
Expand Down
22 changes: 21 additions & 1 deletion src/reports/file-manifest/fileManifestStats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
import { Request, Response } from 'express';

import EsInstance from '../../ElasticSearchClientInstance';
import { PROJECT } from '../../env';
import { reportGenerationErrorHandler } from '../../errors';
import { ProjectType } from '../types';
import getFamilyIds from '../utils/getFamilyIds';
import getFilesFromSqon from '../utils/getFilesFromSqon';
import configInclude from './configInclude';
import configKf from './configKf';

interface IFileByDataType {
key: string;
Expand All @@ -21,12 +25,28 @@ const fileManifestStats = async (req: Request, res: Response): Promise<void> =>
const userId = req['kauth']?.grant?.access_token?.content?.sub;
const accessToken = req.headers.authorization;

let reportConfig;
const p = PROJECT.toLowerCase().trim();
if (p === ProjectType.include) {
reportConfig = configInclude;
} else {
reportConfig = configKf;
}

const wantedFields = ['file_id', 'data_type', 'size', 'participants.participant_id'];

const esClient = EsInstance.getInstance();

try {
const files = await getFilesFromSqon(esClient, projectId, sqon, userId, accessToken, wantedFields);
const files = await getFilesFromSqon(
esClient,
reportConfig,
projectId,
sqon,
userId,
accessToken,
wantedFields,
);

const newFiles = withFamily
? await getFamilyIds(
Expand Down
13 changes: 10 additions & 3 deletions src/reports/file-manifest/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,21 @@ const fileManifestReport = async (req: Request, res: Response): Promise<void> =>
const esClient = EsInstance.getInstance();

try {
const files = await getFilesFromSqon(esClient, projectId, sqon, userId, accessToken, wantedFields);
const files = await getFilesFromSqon(
esClient,
reportConfig,
projectId,
sqon,
userId,
accessToken,
wantedFields,
);
const fileIds = files?.map((f) => f.file_id);
const newFileIds = withFamily ? await getFamilyIds(esClient, fileIds) : fileIds;

const filesInfos = await getInfosByConfig(esClient, reportConfig, newFileIds, 'file_id', esFileIndex);

const path = `/tmp/${filename}.tsv`;
await generateTsvReport(filesInfos, path, reportConfig);
generateTsvReport(filesInfos, path, reportConfig);

res.setHeader('Content-Disposition', `attachment; filename="${filename}.tsv"`);
res.sendFile(path);
Expand Down
7 changes: 4 additions & 3 deletions src/reports/generateReport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import { Client } from '@elastic/elasticsearch';
import xl from 'excel4node';
import { Response } from 'express';
import flattenDeep from 'lodash/flattenDeep';
import noop from 'lodash/noop';
import uniq from 'lodash/uniq';

import * as env from '../env';
import { ES_PAGESIZE } from '../env';
import {
findValueInField,
generateColumnsForProperty,
Expand Down Expand Up @@ -149,8 +150,8 @@ export default async function generateReport(
wrapper.rowIndex += 1;
});
},
onFinish: () => {},
pageSize: env.ES_PAGESIZE,
onFinish: noop,
pageSize: ES_PAGESIZE,
});
console.timeEnd(`executeSearchAfterQuery ${sheetConfig.sheetName}`);
} catch (err) {
Expand Down
7 changes: 4 additions & 3 deletions src/reports/utils/generateExcelReport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ import { Client } from '@elastic/elasticsearch';
import xl from 'excel4node';
import { Response } from 'express';
import flattenDeep from 'lodash/flattenDeep';
import noop from 'lodash/noop';
import uniq from 'lodash/uniq';

import * as env from '../../env';
import { ES_PAGESIZE } from '../../env';
import {
findValueInField,
generateColumnsForProperty,
Expand Down Expand Up @@ -148,8 +149,8 @@ export default async function generateExcelReport(
wrapper.rowIndex += 1;
});
},
onFinish: () => {},
pageSize: env.ES_PAGESIZE,
onFinish: noop,
pageSize: ES_PAGESIZE,
});
console.timeEnd(`executeSearchAfterQuery ${sheetConfig.sheetName}`);
} catch (err) {
Expand Down
4 changes: 2 additions & 2 deletions src/reports/utils/generateTsvReport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ import fs from 'fs';

import { SheetConfig } from '../types';

const generateTsvReport = async (data: { key: string }[], path: string, config: SheetConfig): Promise<void> => {
const generateTsvReport = (data: { key: string }[], path: string, config: SheetConfig): void => {
let tsvContent = config.columns.map((c) => c.header).join('\t') + '\n';

for (const row of data) {
const values = config.columns.map((c) => row[`${c.field}${c.fieldExtraSuffix || ''}`]);
tsvContent += values.join('\t') + '\n';
}

await fs.writeFileSync(path, tsvContent);
fs.writeFileSync(path, tsvContent);
};

export default generateTsvReport;
21 changes: 13 additions & 8 deletions src/reports/utils/getFamilyIds.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { Client } from '@elastic/elasticsearch';
import noop from 'lodash/noop';

import { ES_QUERY_MAX_SIZE, esFileIndex } from '../../env';
import { executeSearch } from '../../utils/esUtils';
import { ES_PAGESIZE, ES_QUERY_MAX_SIZE, esFileIndex } from '../../env';
import { executeSearch, executeSearchAfterQuery } from '../../utils/esUtils';

interface IFileInfo {
data_type: string;
Expand All @@ -14,11 +15,16 @@ const getFilesInfo = async (fileIds: string[], es: Client): Promise<IFileInfo[]>
query: { bool: { must: [{ terms: { file_id: fileIds, boost: 0 } }] } },
_source: ['file_id', 'data_type', 'participants.families_id'],
sort: [{ data_type: { order: 'asc' } }],
size: ES_QUERY_MAX_SIZE,
};
const results = await executeSearch(es, esFileIndex, esRequest);
const hits = results?.body?.hits?.hits || [];
const sources = hits.map((hit) => hit._source);
const sources: any[] = [];
await executeSearchAfterQuery(es, esFileIndex, esRequest, {
onPageFetched: (pageHits) => {
sources.push(...pageHits);
},
onFinish: noop,
pageSize: ES_PAGESIZE,
});

const filesInfos = [];
sources.forEach((source) => {
source.participants &&
Expand Down Expand Up @@ -84,9 +90,8 @@ const getFilesIdsMatched = async (filesInfos: IFileInfo[], es: Client): Promise<
const getFamilyIds = async (es: Client, fileIds: string[]): Promise<string[]> => {
const filesInfos = await getFilesInfo(fileIds, es);
const filesIdsMatched = await getFilesIdsMatched(filesInfos, es);
const newFileIds = [...new Set([...fileIds, ...filesIdsMatched])];

return newFileIds;
return [...new Set([...fileIds, ...filesIdsMatched])];
};

export default getFamilyIds;
24 changes: 18 additions & 6 deletions src/reports/utils/getFilesFromSqon.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import { buildQuery } from '@arranger/middleware';
import { Client } from '@elastic/elasticsearch';
import noop from 'lodash/noop';

import { ES_QUERY_MAX_SIZE, esFileAlias, esFileIndex } from '../../env';
import { ES_PAGESIZE, esFileAlias, esFileIndex } from '../../env';
import { getExtendedConfigs, getNestedFields } from '../../utils/arrangerUtils';
import { executeSearch } from '../../utils/esUtils';
import { executeSearchAfterQuery } from '../../utils/esUtils';
import { Sqon } from '../../utils/setsTypes';
import { resolveSetsInSqon } from '../../utils/sqonUtils';
import { SheetConfig } from '../types';

/**
* Generate a sqon from the family_id of all the participants in the given `sqon`.
Expand All @@ -19,6 +21,7 @@ import { resolveSetsInSqon } from '../../utils/sqonUtils';
*/
const getFilesFromSqon = async (
es: Client,
reportConfig: SheetConfig,
projectId: string,
sqon: Sqon,
userId: string,
Expand All @@ -29,10 +32,19 @@ const getFilesFromSqon = async (
const nestedFields = getNestedFields(extendedConfig);
const newSqon = await resolveSetsInSqon(sqon, userId, accessToken);
const query = buildQuery({ nestedFields, filters: newSqon });
const results = await executeSearch(es, esFileIndex, { query, size: ES_QUERY_MAX_SIZE, _source: fieldsWanted });
const hits = results?.body?.hits?.hits || [];
const sources = hits.map((hit) => hit._source);
return sources;
const esQuery = { query, _source: fieldsWanted, sort: reportConfig.sort };

const results: any[] = [];

await executeSearchAfterQuery(es, esFileIndex, esQuery, {
onPageFetched: (pageHits) => {
results.push(...pageHits);
},
onFinish: noop,
pageSize: ES_PAGESIZE,
});

return results;
};

export default getFilesFromSqon;
20 changes: 13 additions & 7 deletions src/reports/utils/getInfosByConfig.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { Client } from '@elastic/elasticsearch';
import get from 'lodash/get';
import noop from 'lodash/noop';

import { ES_QUERY_MAX_SIZE } from '../../env';
import { executeSearch } from '../../utils/esUtils';
import { ES_PAGESIZE } from '../../env';
import { executeSearchAfterQuery } from '../../utils/esUtils';
import { SheetConfig } from '../types';

/** this recursive func allow you to find nested array values */
Expand All @@ -28,12 +29,17 @@ const getInfosByConfig = async (
query: { bool: { must: [{ terms: { [idField]: ids, boost: 0 } }] } },
_source: [...config.columns.map((e) => e.field), ...(extraFields || [])],
sort: config.sort,
size: ES_QUERY_MAX_SIZE,
};
const results = await executeSearch(es, esIndex, esRequest);
const hits = results?.body?.hits?.hits || [];
const sources = hits.map((hit) => hit._source);
return sources.map((source) =>
const sources: any[] = [];
await executeSearchAfterQuery(es, esIndex, esRequest, {
onPageFetched: (pageHits) => {
sources.push(...pageHits);
},
onFinish: noop,
pageSize: ES_PAGESIZE,
});

return (sources as any).map((source) =>
config.columns.reduce((data, column) => {
const field = column.field;
/** default case example: field = 'file_id' */
Expand Down
Loading