Skip to content

Commit

Permalink
Hashing sourceEntryIds
Browse files Browse the repository at this point in the history
  • Loading branch information
jim-sheldon committed May 13, 2022
1 parent 997e260 commit 378a824
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 47 deletions.
50 changes: 30 additions & 20 deletions data-serving/data-service/src/controllers/case.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { AgeBucket } from '../model/age-bucket';
import {
Case,
caseAgeRange,
Expand Down Expand Up @@ -26,12 +27,12 @@ import {
denormalizeEventsHeaders,
parseDownloadedCase,
removeBlankHeader,
hashAndSalt,
} from '../util/case';

import { logger } from '../util/logger';

import stringify from 'csv-stringify/lib/sync';
import _ from 'lodash';
import { AgeBucket } from '../model/age-bucket';

class GeocodeNotFoundError extends Error {}

Expand All @@ -41,6 +42,13 @@ type BatchValidationErrors = { index: number; message: string }[];

const caseFromDTO = async (receivedCase: CaseDTO) => {
const aCase = (receivedCase as unknown) as LeanDocument<CaseDocument>;
if (
receivedCase.caseReference?.sourceId &&
receivedCase.caseReference?.sourceEntryId
) {
const hashedId = hashAndSalt(receivedCase.caseReference.sourceEntryId);
aCase.caseReference.sourceEntryId = hashedId;
}
if (receivedCase.demographics?.ageRange) {
// won't be many age buckets, so fetch all of them.
const allBuckets = await AgeBucket.find({});
Expand Down Expand Up @@ -295,14 +303,14 @@ export class CasesController {
* Handles HTTP GET /api/cases.
*/
list = async (req: Request, res: Response): Promise<void> => {
logger.info('List method entrypoint');
logger.debug('List method entrypoint');
const page = Number(req.query.page) || 1;
const limit = Number(req.query.limit) || 10;
const countLimit = Number(req.query.count_limit) || 10000;
const sortBy = String(req.query.sort_by) || 'default';
const sortByOrder = String(req.query.order) || 'ascending';

logger.info('Got query params');
logger.debug('Got query params');

if (page < 1) {
res.status(422).json({ message: 'page must be > 0' });
Expand All @@ -321,7 +329,7 @@ export class CasesController {
return;
}

logger.info('Got past 422s');
logger.debug('Got past 422s');
try {
const casesQuery = casesMatchingSearchQuery({
searchQuery: req.query.q || '',
Expand All @@ -348,7 +356,7 @@ export class CasesController {
]);

const dtos = await Promise.all(docs.map(dtoFromCase));
logger.info('got results');
logger.debug('got results');
// total is actually stored in a count index in mongo, so the query is fast.
// however to maintain existing behaviour, only return the count limit
const reportedTotal = Math.min(total, countLimit);
Expand All @@ -360,11 +368,11 @@ export class CasesController {
nextPage: page + 1,
total: reportedTotal,
});
logger.info('Got multiple pages of results');
logger.debug('Got multiple pages of results');
return;
}
// If we fetched all available data, just return it.
logger.info('Got one page of results');
logger.debug('Got one page of results');
res.json({ cases: dtos, total: reportedTotal });
} catch (e) {
if (e instanceof ParsingError) {
Expand Down Expand Up @@ -532,10 +540,8 @@ export class CasesController {
batchUpsert = async (req: Request, res: Response): Promise<void> => {
try {
// Batch validate cases first.
logger.info('batchUpsert: entrypoint');
const cases = req.body.cases;
const errors = await this.batchValidate(cases);
logger.info('batchUpsert: validated cases');
if (errors.length > 0) {
// drop any invalid cases but don't give up yet: upsert the remainder
const badCases = _.orderBy(errors, 'index', 'desc').map(
Expand All @@ -548,12 +554,10 @@ export class CasesController {
`batchUpsert: dropped ${errors.length} invalid cases`,
);
}
logger.info('batchUpsert: splitting cases by sourceID');
const {
unrestrictedCases,
restrictedCases,
} = this.filterCasesBySourceRestricted(cases);
logger.info('batchUpsert: preparing bulk write');
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const upsertLambda = async (c: any) => {
delete c.caseCount;
Expand Down Expand Up @@ -592,7 +596,6 @@ export class CasesController {
await Promise.all(restrictedCases.map(upsertLambda)),
{ ordered: false },
);
logger.info('batchUpsert: finished bulk write');
const status = errors.length > 0 ? 207 : 200;
res.status(status).json({
phase: 'UPSERT',
Expand All @@ -610,11 +613,11 @@ export class CasesController {
} catch (e) {
const err = e as Error;
if (err.name === 'ValidationError') {
logger.error(err);
logger.error(`Validation error in batch upsert: ${err}`);
res.status(422).json(err);
return;
}
logger.error(err);
logger.error(`Other error in batch upsert: ${err}`);
res.status(500).json(err);
return;
}
Expand Down Expand Up @@ -646,6 +649,7 @@ export class CasesController {
res.status(422).json(err);
return;
}
logger.error(`Error in update: ${err.message}`);
res.status(500).json(err);
return;
}
Expand Down Expand Up @@ -725,17 +729,21 @@ export class CasesController {
* Handles HTTP PUT /api/cases.
*/
upsert = async (req: Request, res: Response): Promise<void> => {
logger.info('Upsert entry');
try {
let hashedId = '';
if (req.body.caseReference?.sourceId &&
req.body.caseReference?.sourceEntryId) {
hashedId = hashAndSalt(req.body.caseReference.sourceEntryId);
}
let c = await Case.findOne({
'caseReference.sourceId': req.body.caseReference?.sourceId,
'caseReference.sourceEntryId':
req.body.caseReference?.sourceEntryId,
'caseReference.sourceEntryId': hashedId,
});
if (!c) {
c = await RestrictedCase.findOne({
'caseReference.sourceId': req.body.caseReference?.sourceId,
'caseReference.sourceEntryId':
req.body.caseReference?.sourceEntryId,
'caseReference.sourceEntryId': hashedId,
});
}
if (
Expand Down Expand Up @@ -766,9 +774,11 @@ export class CasesController {
err.name === 'ValidationError' ||
err instanceof InvalidParamError
) {
logger.error(`ValidationError in upsert: ${err.message}`);
res.status(422).json(err.message);
return;
}
logger.error(`Internal server error in upsert: ${err.message}`);
res.status(500).json(err.message);
return;
}
Expand Down Expand Up @@ -1213,7 +1223,7 @@ export const findCasesWithCaseReferenceData = async (
.map((c: any) => {
return {
'caseReference.sourceId': c.caseReference.sourceId,
'caseReference.sourceEntryId': c.caseReference.sourceEntryId,
'caseReference.sourceEntryId': hashAndSalt(c.caseReference.sourceEntryId),
};
});

Expand Down
19 changes: 13 additions & 6 deletions data-serving/data-service/src/controllers/preprocessor.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import { Case, CaseDocument } from '../model/case';
import { NextFunction, Request, Response } from 'express';
import { CaseRevision } from '../model/case-revision';
import {
casesMatchingSearchQuery,
findCasesWithCaseReferenceData,
} from './case';
import { hashAndSalt } from '../util/case';

import { CaseRevision } from '../model/case-revision';
import { NextFunction, Request, Response } from 'express';
import { DocumentQuery } from 'mongoose';
import _ from 'lodash';
import { nextTick } from 'process';
Expand Down Expand Up @@ -58,9 +59,11 @@ export const getCase = async (
// Upsert.
// TODO: Upserts should only generate update metadata if there is a
// diff with what's already in the database.

const hashedId = hashAndSalt(caseReference.sourceEntryId);
return Case.findOne({
'caseReference.sourceId': caseReference.sourceId,
'caseReference.sourceEntryId': caseReference.sourceEntryId,
'caseReference.sourceEntryId': hashedId,
});
}

Expand Down Expand Up @@ -109,7 +112,7 @@ export const batchUpsertDropUnchangedCases = async (
const c = request.body.cases[i];
if (c.caseReference?.sourceId && c.caseReference?.sourceEntryId) {
const existingCase = existingCasesByCaseRefCombo.get(
c.caseReference.sourceId + ':' + c.caseReference.sourceEntryId,
c.caseReference.sourceId + ':' + hashAndSalt(c.caseReference.sourceEntryId),
);
if (existingCase !== undefined && existingCase.equalsJSON(c)) {
request.body.cases.splice(i, 1);
Expand Down Expand Up @@ -151,11 +154,15 @@ export const setBatchUpsertFields = async (
request.body.cases.forEach((c: any) => {
// Set the request cases' revision metadata to the update metadata, if
// present, or create metadata otherwise.
let hashedId = undefined;
if (c.caseReference?.sourceEntryId) {
hashedId = hashAndSalt(c.caseReference.sourceEntryId);
}
c.revisionMetadata =
metadataMap.get(
c.caseReference?.sourceId +
':' +
c.caseReference?.sourceEntryId,
hashedId,
) || createNewMetadata(curatorEmail);

// If case is present, add uploadIds to existing list of uploadIds
Expand All @@ -165,7 +172,7 @@ export const setBatchUpsertFields = async (
c.caseReference?.sourceEntryId
) {
const existingCaseUploadIds = existingCasesByCaseRefCombo.get(
c.caseReference.sourceId + ':' + c.caseReference.sourceEntryId,
c.caseReference.sourceId + ':' + hashAndSalt(c.caseReference.sourceEntryId),
)?.caseReference?.uploadIds;
if (existingCaseUploadIds) {
c.caseReference.uploadIds = _.union(
Expand Down
14 changes: 14 additions & 0 deletions data-serving/data-service/src/util/case.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import { TravelHistoryDocument } from '../model/travel-history';
import { VaccineDocument } from '../model/vaccine';
import { VariantDocument } from '../model/variant';

import validateEnv from './validate-env';

import { createHash } from 'crypto';
import _ from 'lodash';

const validEvents = [
Expand All @@ -29,6 +32,8 @@ const dateOnlyEvents = [
'selfIsolation',
];

const env = validateEnv();

/**
* Converts event list to object to make a column for every event in csv file.
*
Expand Down Expand Up @@ -92,6 +97,15 @@ export const parseDownloadedCase = (caseDocument: CaseDTO) => {
};
};

/**
* Hashes and salts a string. Used for sourceEntryIds.
*/
export const hashAndSalt = (rawString: string) => {
// FIXME: hash secret
// FIXME: salt
return createHash('sha256').update(rawString + env.SALT).digest('hex');
}

/**
* Enum with possible sortBy keywords
*/
Expand Down
5 changes: 5 additions & 0 deletions data-serving/data-service/src/util/validate-env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export default function validateEnv(): Readonly<{
DB_CONNECTION_STRING: string;
PORT: number;
SERVICE_ENV: string;
SALT: string;
}> & {
readonly [varName: string]: string | boolean | number | undefined;
// eslint-disable-next-line indent
Expand All @@ -24,5 +25,9 @@ export default function validateEnv(): Readonly<{
desc: 'Environment in which the service is running',
devDefault: 'local',
}),
SALT: str({
desc: 'Additional string to append before hashing',
devDefault: 'salt',
}),
});
}
Loading

0 comments on commit 378a824

Please sign in to comment.