From 378a8247b406e19c99796d22f3db1790b2e33c88 Mon Sep 17 00:00:00 2001 From: jim-sheldon Date: Tue, 10 May 2022 18:21:36 -0400 Subject: [PATCH] Hashing sourceEntryIds --- .../data-service/src/controllers/case.ts | 50 +++++++++------- .../src/controllers/preprocessor.ts | 19 ++++-- data-serving/data-service/src/util/case.ts | 14 +++++ .../data-service/src/util/validate-env.ts | 5 ++ .../test/controllers/case.test.ts | 58 ++++++++++++++----- .../test/controllers/preprocessor.test.ts | 31 ++++++++-- data-serving/data-service/tsconfig.json | 2 +- .../scripts/setup-db/migrate-mongo-config.js | 5 +- .../20220510211614-hash-source-entry-ids.js | 24 ++++++++ 9 files changed, 161 insertions(+), 47 deletions(-) create mode 100644 data-serving/scripts/setup-db/migrations/20220510211614-hash-source-entry-ids.js diff --git a/data-serving/data-service/src/controllers/case.ts b/data-serving/data-service/src/controllers/case.ts index 50b6ec89c..f71f56a98 100644 --- a/data-serving/data-service/src/controllers/case.ts +++ b/data-serving/data-service/src/controllers/case.ts @@ -1,3 +1,4 @@ +import { AgeBucket } from '../model/age-bucket'; import { Case, caseAgeRange, @@ -26,12 +27,12 @@ import { denormalizeEventsHeaders, parseDownloadedCase, removeBlankHeader, + hashAndSalt, } from '../util/case'; - import { logger } from '../util/logger'; + import stringify from 'csv-stringify/lib/sync'; import _ from 'lodash'; -import { AgeBucket } from '../model/age-bucket'; class GeocodeNotFoundError extends Error {} @@ -41,6 +42,13 @@ type BatchValidationErrors = { index: number; message: string }[]; const caseFromDTO = async (receivedCase: CaseDTO) => { const aCase = (receivedCase as unknown) as LeanDocument; + if ( + receivedCase.caseReference?.sourceId && + receivedCase.caseReference?.sourceEntryId + ) { + const hashedId = hashAndSalt(receivedCase.caseReference.sourceEntryId); + aCase.caseReference.sourceEntryId = hashedId; + } if (receivedCase.demographics?.ageRange) { // won't be many age buckets, so fetch all of them. const allBuckets = await AgeBucket.find({}); @@ -295,14 +303,14 @@ export class CasesController { * Handles HTTP GET /api/cases. */ list = async (req: Request, res: Response): Promise => { - logger.info('List method entrypoint'); + logger.debug('List method entrypoint'); const page = Number(req.query.page) || 1; const limit = Number(req.query.limit) || 10; const countLimit = Number(req.query.count_limit) || 10000; const sortBy = String(req.query.sort_by) || 'default'; const sortByOrder = String(req.query.order) || 'ascending'; - logger.info('Got query params'); + logger.debug('Got query params'); if (page < 1) { res.status(422).json({ message: 'page must be > 0' }); @@ -321,7 +329,7 @@ export class CasesController { return; } - logger.info('Got past 422s'); + logger.debug('Got past 422s'); try { const casesQuery = casesMatchingSearchQuery({ searchQuery: req.query.q || '', @@ -348,7 +356,7 @@ export class CasesController { ]); const dtos = await Promise.all(docs.map(dtoFromCase)); - logger.info('got results'); + logger.debug('got results'); // total is actually stored in a count index in mongo, so the query is fast. // however to maintain existing behaviour, only return the count limit const reportedTotal = Math.min(total, countLimit); @@ -360,11 +368,11 @@ export class CasesController { nextPage: page + 1, total: reportedTotal, }); - logger.info('Got multiple pages of results'); + logger.debug('Got multiple pages of results'); return; } // If we fetched all available data, just return it. - logger.info('Got one page of results'); + logger.debug('Got one page of results'); res.json({ cases: dtos, total: reportedTotal }); } catch (e) { if (e instanceof ParsingError) { @@ -532,10 +540,8 @@ export class CasesController { batchUpsert = async (req: Request, res: Response): Promise => { try { // Batch validate cases first. - logger.info('batchUpsert: entrypoint'); const cases = req.body.cases; const errors = await this.batchValidate(cases); - logger.info('batchUpsert: validated cases'); if (errors.length > 0) { // drop any invalid cases but don't give up yet: upsert the remainder const badCases = _.orderBy(errors, 'index', 'desc').map( @@ -548,12 +554,10 @@ export class CasesController { `batchUpsert: dropped ${errors.length} invalid cases`, ); } - logger.info('batchUpsert: splitting cases by sourceID'); const { unrestrictedCases, restrictedCases, } = this.filterCasesBySourceRestricted(cases); - logger.info('batchUpsert: preparing bulk write'); // eslint-disable-next-line @typescript-eslint/no-explicit-any const upsertLambda = async (c: any) => { delete c.caseCount; @@ -592,7 +596,6 @@ export class CasesController { await Promise.all(restrictedCases.map(upsertLambda)), { ordered: false }, ); - logger.info('batchUpsert: finished bulk write'); const status = errors.length > 0 ? 207 : 200; res.status(status).json({ phase: 'UPSERT', @@ -610,11 +613,11 @@ export class CasesController { } catch (e) { const err = e as Error; if (err.name === 'ValidationError') { - logger.error(err); + logger.error(`Validation error in batch upsert: ${err}`); res.status(422).json(err); return; } - logger.error(err); + logger.error(`Other error in batch upsert: ${err}`); res.status(500).json(err); return; } @@ -646,6 +649,7 @@ export class CasesController { res.status(422).json(err); return; } + logger.error(`Error in update: ${err.message}`); res.status(500).json(err); return; } @@ -725,17 +729,21 @@ export class CasesController { * Handles HTTP PUT /api/cases. */ upsert = async (req: Request, res: Response): Promise => { + logger.info('Upsert entry'); try { + let hashedId = ''; + if (req.body.caseReference?.sourceId && + req.body.caseReference?.sourceEntryId) { + hashedId = hashAndSalt(req.body.caseReference.sourceEntryId); + } let c = await Case.findOne({ 'caseReference.sourceId': req.body.caseReference?.sourceId, - 'caseReference.sourceEntryId': - req.body.caseReference?.sourceEntryId, + 'caseReference.sourceEntryId': hashedId, }); if (!c) { c = await RestrictedCase.findOne({ 'caseReference.sourceId': req.body.caseReference?.sourceId, - 'caseReference.sourceEntryId': - req.body.caseReference?.sourceEntryId, + 'caseReference.sourceEntryId': hashedId, }); } if ( @@ -766,9 +774,11 @@ export class CasesController { err.name === 'ValidationError' || err instanceof InvalidParamError ) { + logger.error(`ValidationError in upsert: ${err.message}`); res.status(422).json(err.message); return; } + logger.error(`Internal server error in upsert: ${err.message}`); res.status(500).json(err.message); return; } @@ -1213,7 +1223,7 @@ export const findCasesWithCaseReferenceData = async ( .map((c: any) => { return { 'caseReference.sourceId': c.caseReference.sourceId, - 'caseReference.sourceEntryId': c.caseReference.sourceEntryId, + 'caseReference.sourceEntryId': hashAndSalt(c.caseReference.sourceEntryId), }; }); diff --git a/data-serving/data-service/src/controllers/preprocessor.ts b/data-serving/data-service/src/controllers/preprocessor.ts index 903cf746e..64e0c2d8e 100644 --- a/data-serving/data-service/src/controllers/preprocessor.ts +++ b/data-serving/data-service/src/controllers/preprocessor.ts @@ -1,11 +1,12 @@ import { Case, CaseDocument } from '../model/case'; -import { NextFunction, Request, Response } from 'express'; +import { CaseRevision } from '../model/case-revision'; import { casesMatchingSearchQuery, findCasesWithCaseReferenceData, } from './case'; +import { hashAndSalt } from '../util/case'; -import { CaseRevision } from '../model/case-revision'; +import { NextFunction, Request, Response } from 'express'; import { DocumentQuery } from 'mongoose'; import _ from 'lodash'; import { nextTick } from 'process'; @@ -58,9 +59,11 @@ export const getCase = async ( // Upsert. // TODO: Upserts should only generate update metadata if there is a // diff with what's already in the database. + + const hashedId = hashAndSalt(caseReference.sourceEntryId); return Case.findOne({ 'caseReference.sourceId': caseReference.sourceId, - 'caseReference.sourceEntryId': caseReference.sourceEntryId, + 'caseReference.sourceEntryId': hashedId, }); } @@ -109,7 +112,7 @@ export const batchUpsertDropUnchangedCases = async ( const c = request.body.cases[i]; if (c.caseReference?.sourceId && c.caseReference?.sourceEntryId) { const existingCase = existingCasesByCaseRefCombo.get( - c.caseReference.sourceId + ':' + c.caseReference.sourceEntryId, + c.caseReference.sourceId + ':' + hashAndSalt(c.caseReference.sourceEntryId), ); if (existingCase !== undefined && existingCase.equalsJSON(c)) { request.body.cases.splice(i, 1); @@ -151,11 +154,15 @@ export const setBatchUpsertFields = async ( request.body.cases.forEach((c: any) => { // Set the request cases' revision metadata to the update metadata, if // present, or create metadata otherwise. + let hashedId = undefined; + if (c.caseReference?.sourceEntryId) { + hashedId = hashAndSalt(c.caseReference.sourceEntryId); + } c.revisionMetadata = metadataMap.get( c.caseReference?.sourceId + ':' + - c.caseReference?.sourceEntryId, + hashedId, ) || createNewMetadata(curatorEmail); // If case is present, add uploadIds to existing list of uploadIds @@ -165,7 +172,7 @@ export const setBatchUpsertFields = async ( c.caseReference?.sourceEntryId ) { const existingCaseUploadIds = existingCasesByCaseRefCombo.get( - c.caseReference.sourceId + ':' + c.caseReference.sourceEntryId, + c.caseReference.sourceId + ':' + hashAndSalt(c.caseReference.sourceEntryId), )?.caseReference?.uploadIds; if (existingCaseUploadIds) { c.caseReference.uploadIds = _.union( diff --git a/data-serving/data-service/src/util/case.ts b/data-serving/data-service/src/util/case.ts index 49acc2dad..c4dd4024d 100644 --- a/data-serving/data-service/src/util/case.ts +++ b/data-serving/data-service/src/util/case.ts @@ -12,6 +12,9 @@ import { TravelHistoryDocument } from '../model/travel-history'; import { VaccineDocument } from '../model/vaccine'; import { VariantDocument } from '../model/variant'; +import validateEnv from './validate-env'; + +import { createHash } from 'crypto'; import _ from 'lodash'; const validEvents = [ @@ -29,6 +32,8 @@ const dateOnlyEvents = [ 'selfIsolation', ]; +const env = validateEnv(); + /** * Converts event list to object to make a column for every event in csv file. * @@ -92,6 +97,15 @@ export const parseDownloadedCase = (caseDocument: CaseDTO) => { }; }; +/** + * Hashes and salts a string. Used for sourceEntryIds. + */ +export const hashAndSalt = (rawString: string) => { + // FIXME: hash secret + // FIXME: salt + return createHash('sha256').update(rawString + env.SALT).digest('hex'); +} + /** * Enum with possible sortBy keywords */ diff --git a/data-serving/data-service/src/util/validate-env.ts b/data-serving/data-service/src/util/validate-env.ts index ef3cc7b6f..9247a2e4a 100644 --- a/data-serving/data-service/src/util/validate-env.ts +++ b/data-serving/data-service/src/util/validate-env.ts @@ -5,6 +5,7 @@ export default function validateEnv(): Readonly<{ DB_CONNECTION_STRING: string; PORT: number; SERVICE_ENV: string; + SALT: string; }> & { readonly [varName: string]: string | boolean | number | undefined; // eslint-disable-next-line indent @@ -24,5 +25,9 @@ export default function validateEnv(): Readonly<{ desc: 'Environment in which the service is running', devDefault: 'local', }), + SALT: str({ + desc: 'Additional string to append before hashing', + devDefault: 'salt', + }), }); } diff --git a/data-serving/data-service/test/controllers/case.test.ts b/data-serving/data-service/test/controllers/case.test.ts index e4d8d865d..9ddde9098 100644 --- a/data-serving/data-service/test/controllers/case.test.ts +++ b/data-serving/data-service/test/controllers/case.test.ts @@ -4,6 +4,7 @@ import { Demographics } from '../../src/model/demographics'; import { MongoMemoryServer } from 'mongodb-memory-server'; import { Source } from '../../src/model/source'; import { PathogenDocument } from '../../src/model/pathogen'; +import { hashAndSalt } from '../../src/util/case'; import app from './../../src/index'; import fullCase from './../model/data/case.full.json'; import minimalCase from './../model/data/case.minimal.json'; @@ -586,19 +587,28 @@ describe('POST', () => { it('batch upsert with only valid cases should return 200 with counts', async () => { const newCaseWithoutEntryId = new Case(minimalCase); const newCaseWithEntryId = new Case(fullCase); - newCaseWithEntryId.caseReference.sourceEntryId = 'newId'; + + const newCaseSourceEntryId = 'newId'; + newCaseWithEntryId.caseReference.sourceEntryId = hashAndSalt(newCaseSourceEntryId); const changedCaseWithEntryId = new Case(fullCase); + const changedCaseSourceEntryId = 'changedEntryId'; + changedCaseWithEntryId.caseReference.sourceEntryId = hashAndSalt(changedCaseSourceEntryId); await changedCaseWithEntryId.save(); changedCaseWithEntryId.pathogens = [ { id: '304', name: 'Pneumonia' } as PathogenDocument, ]; const unchangedCaseWithEntryId = new Case(fullCase); - unchangedCaseWithEntryId.caseReference.sourceEntryId = - 'unchangedEntryId'; + const unchangedCaseSourceEntryId = 'unchangedEntryId'; + const hashedUnchangedCaseSEId = hashAndSalt(unchangedCaseSourceEntryId); + unchangedCaseWithEntryId.caseReference.sourceEntryId = hashedUnchangedCaseSEId; + await unchangedCaseWithEntryId.save(); + unchangedCaseWithEntryId.caseReference.sourceEntryId = unchangedCaseSourceEntryId; + changedCaseWithEntryId.caseReference.sourceEntryId = changedCaseSourceEntryId; + const res = await request(app) .post('/api/cases/batchUpsert') .send({ @@ -615,10 +625,13 @@ describe('POST', () => { const unchangedDbCase = await Case.findById( unchangedCaseWithEntryId._id, ); + unchangedCaseWithEntryId.caseReference.sourceEntryId = hashedUnchangedCaseSEId; expect(unchangedDbCase?.toJSON()).toEqual( unchangedCaseWithEntryId.toJSON(), ); + expect(res.body.numCreated).toBe(2); // Both new cases were created. + expect(res.body.numUpdated).toBe(1); // Only changed case was updated. const updatedCaseInDb = await Case.findById(changedCaseWithEntryId._id); @@ -661,10 +674,13 @@ describe('POST', () => { const newCaseWithoutEntryId = new Case(minimalCase); newCaseWithoutEntryId.caseReference.uploadIds = newUploadIds; const newCaseWithEntryId = new Case(fullCase); - newCaseWithEntryId.caseReference.sourceEntryId = 'newId'; + const newCaseSourceEntryId = 'newId'; + newCaseWithEntryId.caseReference.sourceEntryId = hashAndSalt(newCaseSourceEntryId); newCaseWithEntryId.caseReference.uploadIds = newUploadIds; const changedCaseWithEntryId = new Case(fullCase); + const changedCaseSourceEntryId = 'changedEntryId'; + changedCaseWithEntryId.caseReference.sourceEntryId = hashAndSalt(changedCaseSourceEntryId); await changedCaseWithEntryId.save(); changedCaseWithEntryId.caseReference.uploadIds = newUploadIds; changedCaseWithEntryId.pathogens = [ @@ -672,13 +688,17 @@ describe('POST', () => { ]; const unchangedCaseWithEntryId = new Case(fullCase); - unchangedCaseWithEntryId.caseReference.sourceEntryId = - 'unchangedEntryId'; + const unchangedCaseSourceEntryId = 'unchangedEntryId'; + unchangedCaseWithEntryId.caseReference.sourceEntryId = hashAndSalt(unchangedCaseSourceEntryId); const unchangedCaseUploadIds = unchangedCaseWithEntryId.caseReference.uploadIds; await unchangedCaseWithEntryId.save(); unchangedCaseWithEntryId.caseReference.uploadIds = newUploadIds; + newCaseWithEntryId.caseReference.sourceEntryId = newCaseSourceEntryId; + unchangedCaseWithEntryId.caseReference.sourceEntryId = unchangedCaseSourceEntryId; + changedCaseWithEntryId.caseReference.sourceEntryId = changedCaseSourceEntryId; + const res = await request(app) .post('/api/cases/batchUpsert') .send({ @@ -802,11 +822,14 @@ describe('POST', () => { }); it('batch upsert should result in create and update metadata', async () => { const existingCase = new Case(fullCase); + const existingCaseSourceEntryId = existingCase.caseReference.sourceEntryId; + existingCase.caseReference.sourceEntryId = hashAndSalt(existingCaseSourceEntryId); await existingCase.save(); existingCase.pathogens = [ { id: '104', name: 'Pneumonia' } as PathogenDocument, ]; + existingCase.caseReference.sourceEntryId = existingCaseSourceEntryId; const res = await request(app) .post('/api/cases/batchUpsert') .send({ @@ -835,11 +858,14 @@ describe('POST', () => { }); it('batch upsert should result in case revisions of existing cases', async () => { const existingCase = new Case(fullCase); + const existingCaseSourceEntryId = existingCase.caseReference.sourceEntryId; + existingCase.caseReference.sourceEntryId = hashAndSalt(existingCaseSourceEntryId); await existingCase.save(); existingCase.pathogens = [ { id: '104', name: 'Pneumonia' } as PathogenDocument, ]; + existingCase.caseReference.sourceEntryId = existingCaseSourceEntryId; const res = await request(app) .post('/api/cases/batchUpsert') .send({ @@ -851,8 +877,11 @@ describe('POST', () => { }); it('batch upsert for unchanged case skips creating metadata and revision', async () => { const existingCase = new Case(fullCase); + const existingCaseSourceEntryId = existingCase.caseReference.sourceEntryId; + existingCase.caseReference.sourceEntryId = hashAndSalt(existingCaseSourceEntryId); await existingCase.save(); + existingCase.caseReference.sourceEntryId = existingCaseSourceEntryId; await request(app) .post('/api/cases/batchUpsert') .send({ @@ -1354,10 +1383,13 @@ describe('POST', () => { it('should return empty array when no cases match', async () => { const existingCase = new Case(fullCase); + const existingCaseSourceEntryId = existingCase.caseReference.sourceEntryId; + existingCase.caseReference.sourceEntryId = hashAndSalt(existingCaseSourceEntryId); await existingCase.save(); const caseSourceId = existingCase.caseReference.sourceId; + // FIXME: the sourceEntryId gets hashed again?! const res = await request(app) .get(`/api/excludedCaseIds?sourceId=${caseSourceId}`) .expect(200); @@ -1455,7 +1487,7 @@ describe('POST', () => { const case2 = new Case(fullCase); case2.caseReference.verificationStatus = 'EXCLUDED'; - case2.caseReference.sourceEntryId = 'entry2'; + case2.caseReference.sourceEntryId = 'entry2';; await case2.save(); const case3 = new Case(fullCase); @@ -1759,7 +1791,7 @@ describe('PUT', () => { const sourceId = '5ea86423bae6982635d2e1f8'; const entryId = 'def456'; c.set('caseReference.sourceId', sourceId); - c.set('caseReference.sourceEntryId', entryId); + c.set('caseReference.sourceEntryId', hashAndSalt(entryId)); await c.save(); const newNotes = 'abc'; @@ -1785,7 +1817,7 @@ describe('PUT', () => { const sourceId = '5ea86423bae6982635d2e1f8'; const entryId = 'def456'; c.set('caseReference.sourceId', sourceId); - c.set('caseReference.sourceEntryId', entryId); + c.set('caseReference.sourceEntryId', hashAndSalt(entryId)); await c.save(); const ageRange = { @@ -1816,7 +1848,7 @@ describe('PUT', () => { const sourceId = '5ea86423bae6982635d2e1f8'; const entryId = 'def456'; c.set('caseReference.sourceId', sourceId); - c.set('caseReference.sourceEntryId', entryId); + c.set('caseReference.sourceEntryId', hashAndSalt(entryId)); await c.save(); const newNotes = 'abc'; @@ -1848,7 +1880,7 @@ describe('PUT', () => { const sourceId = '5ea86423bae6982635d2e1f8'; const entryId = 'def456'; c.set('caseReference.sourceId', sourceId); - c.set('caseReference.sourceEntryId', entryId); + c.set('caseReference.sourceEntryId', hashAndSalt(entryId)); await c.save(); const newNotes = 'abc'; @@ -1872,7 +1904,7 @@ describe('PUT', () => { const sourceId = '5ea86423bae6982635d2e1f8'; const entryId = 'def456'; c.set('caseReference.sourceId', sourceId); - c.set('caseReference.sourceEntryId', entryId); + c.set('caseReference.sourceEntryId', hashAndSalt(entryId)); await c.save(); const newNotes = 'abc'; @@ -1948,7 +1980,7 @@ describe('PUT', () => { const sourceId = '5ea86423bae6982635d2e1f8'; const entryId = 'def456'; c.set('caseReference.sourceId', sourceId); - c.set('caseReference.sourceEntryId', entryId); + c.set('caseReference.sourceEntryId', hashAndSalt(entryId)); await c.save(); return request(app) diff --git a/data-serving/data-service/test/controllers/preprocessor.test.ts b/data-serving/data-service/test/controllers/preprocessor.test.ts index 429488370..4d1ca32ee 100644 --- a/data-serving/data-service/test/controllers/preprocessor.test.ts +++ b/data-serving/data-service/test/controllers/preprocessor.test.ts @@ -14,6 +14,7 @@ import { import { Case } from '../../src/model/case'; import { CaseRevision } from '../../src/model/case-revision'; import { Demographics } from '../../src/model/demographics'; +import { hashAndSalt } from '../../src/util/case'; import { MongoMemoryServer } from 'mongodb-memory-server'; import app from '../../src/index'; import minimalCase from './../model/data/case.minimal.json'; @@ -245,11 +246,12 @@ describe('upsert', () => { }); }); it('with existing case sets update metadata', async () => { + const sourceEntryId = 'case_id'; const upsertCase = { ...minimalCase, caseReference: { ...minimalCase.caseReference, - sourceEntryId: 'case_id', + sourceEntryId: hashAndSalt(sourceEntryId), }, }; const c = new Case({ @@ -264,6 +266,8 @@ describe('upsert', () => { }); await c.save(); + upsertCase.caseReference.sourceEntryId = sourceEntryId; + const requestBody = { ...upsertCase, curator: { email: 'updater@gmail.com' }, @@ -314,11 +318,12 @@ describe('upsert', () => { expect(await CaseRevision.collection.countDocuments()).toEqual(0); }); it('with existing case creates a case revision', async () => { + const sourceEntryId = 'case_id'; const upsertCase = { ...minimalCase, caseReference: { ...minimalCase.caseReference, - sourceEntryId: 'case_id', + sourceEntryId: hashAndSalt(sourceEntryId), }, }; const c = new Case({ @@ -333,6 +338,7 @@ describe('upsert', () => { }); await c.save(); + upsertCase.caseReference.sourceEntryId = sourceEntryId; const requestBody = { ...upsertCase, curator: { email: 'updater@gmail.com' }, @@ -353,11 +359,12 @@ describe('upsert', () => { }); describe('batch upsert', () => { it('sets create and update metadata', async () => { + const sourceEntryId = 'case_id_exists'; const existingCase = { ...minimalCase, caseReference: { ...minimalCase.caseReference, - sourceEntryId: 'case_id_exists', + sourceEntryId: hashAndSalt(sourceEntryId), }, }; const c = new Case({ @@ -371,6 +378,8 @@ describe('batch upsert', () => { }, }); await c.save(); + + existingCase.caseReference.sourceEntryId = sourceEntryId; const existingCaseWithUpdate = { ...existingCase, notes: 'new notes' }; const newCase = { @@ -449,11 +458,13 @@ describe('batch upsert', () => { }); }); it('with existing cases creates case revisions', async () => { + const sourceEntryId = 'case_id_exists'; + const hashedSourceEntryId = hashAndSalt(sourceEntryId); const existingCase = { ...minimalCase, caseReference: { ...minimalCase.caseReference, - sourceEntryId: 'case_id_exists', + sourceEntryId: hashedSourceEntryId, }, }; const c = new Case({ @@ -468,6 +479,7 @@ describe('batch upsert', () => { }); await c.save(); + existingCase.caseReference.sourceEntryId = sourceEntryId; const newCase = { ...minimalCase, caseReference: { @@ -487,6 +499,8 @@ describe('batch upsert', () => { nextFn, ); + c.caseReference.sourceEntryId = hashedSourceEntryId; + expect(nextFn).toHaveBeenCalledTimes(1); expect(await CaseRevision.collection.countDocuments()).toEqual(1); expect((await CaseRevision.find())[0].case.toObject()).toEqual( @@ -494,18 +508,20 @@ describe('batch upsert', () => { ); }); it('removes cases from request that would not be updated', async () => { + const sourceEntryId1 = 'case_id_exists'; const existingCase = { ...minimalCase, caseReference: { ...minimalCase.caseReference, - sourceEntryId: 'case_id_exists', + sourceEntryId: hashAndSalt(sourceEntryId1), }, }; + const sourceEntryId2 = 'case_id_exists2'; const existingCase2 = { ...minimalCase, caseReference: { ...minimalCase.caseReference, - sourceEntryId: 'case_id_exists2', + sourceEntryId: hashAndSalt(sourceEntryId2), }, }; const newCase = { @@ -538,6 +554,9 @@ describe('batch upsert', () => { }); await c2.save(); + existingCase.caseReference.sourceEntryId = sourceEntryId1; + existingCase2.caseReference.sourceEntryId = sourceEntryId2; + const requestBody = { cases: [existingCase, existingCase2, newCase], curator: { email: 'updater@gmail.com' }, diff --git a/data-serving/data-service/tsconfig.json b/data-serving/data-service/tsconfig.json index f13297219..200849556 100644 --- a/data-serving/data-service/tsconfig.json +++ b/data-serving/data-service/tsconfig.json @@ -2,7 +2,7 @@ "extends": "../../common/tsconfig.json", "compilerOptions": { "outDir": "dist", /* Redirect output structure to the directory. */ - "types": [ "@types/jest" ] + "types": [ "@types/jest" ], }, "include": [ "src/**/*.ts", diff --git a/data-serving/scripts/setup-db/migrate-mongo-config.js b/data-serving/scripts/setup-db/migrate-mongo-config.js index a22bb855c..56336cec6 100644 --- a/data-serving/scripts/setup-db/migrate-mongo-config.js +++ b/data-serving/scripts/setup-db/migrate-mongo-config.js @@ -27,7 +27,10 @@ const config = { // Enable the algorithm to create a checksum of the file contents and use that in the comparison to determin // if the file should be run. Requires that scripts are coded to be run multiple times. - useFileHash: false + useFileHash: false, + + // https://github.com/seppevs/migrate-mongo/issues/411 + moduleSystem: 'commonjs' }; // Return the config as a promise diff --git a/data-serving/scripts/setup-db/migrations/20220510211614-hash-source-entry-ids.js b/data-serving/scripts/setup-db/migrations/20220510211614-hash-source-entry-ids.js new file mode 100644 index 000000000..d462392cd --- /dev/null +++ b/data-serving/scripts/setup-db/migrations/20220510211614-hash-source-entry-ids.js @@ -0,0 +1,24 @@ +import { createHash } from 'crypto'; + +module.exports = { + async up(db, client) { + const collection = db.collection('cases'); + + // This happens one at a time, and will be slow. + collection.find({"caseReference.sourceEntryId": {$ne: null} }).forEach(function (doc) { + doc.events.forEach((anEvent) => { + if (doc.caseReference?.sourceEntryId) { + const hashedId = createHash('sha256').update(doc.caseReference.sourceEntryId).digest('hex'); + collection.updateOne( + { _id: doc._id }, + { $set: { "caseReference.sourceEntryId": hashedId } }, + ); + } + }); + }); + }, + + async down(db, client) { + // "Unhashing" is not a thing. + } +};