Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perform LiftOver for CMH query results (GRCh38 -> h37) #353

Merged
merged 7 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 52 additions & 3 deletions server/src/models/GnomadAnnotationModel.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import mongoose, { Model, model } from 'mongoose';
import logger from '../logger';
import {
CMHVariantIndelCoordinate,
GnomadBaseAnnotation,
GnomadGenomeAnnotation,
GnomadGRCh37ExomeAnnotation,
Expand Down Expand Up @@ -90,13 +91,61 @@ const getAnnotations = async (

if (!coordinates.length) return [];

return await model.aggregate([
{ $match: { pos: { $gte: start, $lte: end } } },
{ $match: { $or: coordinates } },
// Check and modify coordinates to fit gnomAD model
// CMH uses "-" in ref/alt fields for insertions/deletions respectively, gnomAD doesn't
const cmhInsertions: CMHVariantIndelCoordinate<'$alt'>[] = [];
const cmhDeletions: CMHVariantIndelCoordinate<'$ref'>[] = [];
const normalCoords: VariantCoordinate[] = [];
coordinates.forEach(coord => {
if (coord.alt === '-') {
// CMH deletion
cmhDeletions.push({
$expr: {
$eq: [{ $substrCP: ['$ref', 1, { $strLenCP: '$ref' }] }, coord.ref],
},
pos: coord.pos - 1, // cmh 'start' is +1 compared to gnomad
chrom: coord.chrom,
});
} else if (coord.ref === '-') {
// CMH insertion
cmhInsertions.push({
$expr: {
$eq: [{ $substrCP: ['$alt', 1, { $strLenCP: '$alt' }] }, coord.alt],
},
pos: coord.pos,
chrom: coord.chrom,
});
} else {
normalCoords.push(coord);
}
});

// don't worry about the types
const coordinateStage: { $match: { $or: any[] } } = {
$match: {
$or: [
...normalCoords, // normal coordinates
],
},
};

// Don't need to add match for CMH coordinates if there aren't any
if (cmhInsertions.length > 0 || cmhDeletions.length > 0) {
// we assume that ref[0] == alt[0] in gnomAD
coordinateStage.$match.$or = coordinateStage.$match.$or.concat([
...cmhInsertions,
...cmhDeletions,
]);
}

const results = await model.aggregate([
{ $match: { pos: { $gte: Math.max(start - 1, 0), $lte: end } } },
coordinateStage,
{
$project: Object.fromEntries([...omittedFields, '_id', 'assembly', 'type'].map(f => [f, 0])),
},
]);
return results;
};

GnomadGRCh37AnnotationSchema.statics.getAnnotations = async function (ids: AnnotationInput) {
Expand Down
8 changes: 6 additions & 2 deletions server/src/models/utils/getCoordinates.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import resolveAssembly from '../../resolvers/getVariantsResolver/utils/resolveAssembly';
import { VariantCoordinate, VariantQueryDataResult } from '../../types';

const getCoordinates = (result: VariantQueryDataResult[]) => {
const getCoordinates = (results: VariantQueryDataResult[]) => {
let start = +Infinity;
let end = -Infinity;
const coordinates: VariantCoordinate[] = [];
const variants = result.flat().map(d => d.variant);
const variants = results.flat().map(d => d.variant);
variants.forEach(variant => {
const resolvedAssemblyId = resolveAssembly(variant.assemblyIdCurrent ?? 'GRCh37');

Expand All @@ -17,6 +17,10 @@ const getCoordinates = (result: VariantQueryDataResult[]) => {
end = variant.end;
}

if (variant.chromosome.startsWith('chr')) {
variant.chromosome = variant.chromosome.substring(3);
}

coordinates.push({
alt: variant.alt,
chrom: `${resolvedAssemblyId === 'GRCh38' ? 'chr' : ''}${variant.chromosome}`,
Expand Down
98 changes: 56 additions & 42 deletions server/src/resolvers/getVariantsResolver/utils/liftOver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,15 @@ const liftover = timeitAsync('liftover')(
// Convert variants from JSON format to BED format.
// Note that position format is 1-based and BED format is half-open 0-based: https://genome.ucsc.edu/FAQ/FAQformat.html#format1
const bedstring = dataForLiftover
.map(v => `chr${v.variant.chromosome}\t${v.variant.start - 1}\t${v.variant.end}`)
.map(
v =>
`${
// if coming from GRCh38, probably starts with chr already
v.variant.chromosome.startsWith('chr')
? v.variant.chromosome
: 'chr' + v.variant.chromosome
}\t${v.variant.start - 1}\t${v.variant.end}`
)
.join('\n');
const lifted = await createTmpFile();
const unlifted = await createTmpFile();
Expand All @@ -46,52 +54,58 @@ const liftover = timeitAsync('liftover')(
} else {
chain = '/home/node/hg19ToHg38.over.chain';
}
await exec(`liftOver ${bedfile} ${chain} ${lifted} ${unlifted}`);
const _liftedVars = await promises.readFile(lifted);
const _unliftedVars = await promises.readFile(unlifted);
const liftedVars = parseBedStart(_liftedVars.toString());
const unliftedVars = parseBedStart(_unliftedVars.toString());
const liftedVarsEnd = parseBedEnd(_liftedVars.toString());
const liftOverCommand = `liftOver ${bedfile} ${chain} ${lifted} ${unlifted}`;
try {
await exec(liftOverCommand);
const _liftedVars = await promises.readFile(lifted);
const _unliftedVars = await promises.readFile(unlifted);
const liftedVars = parseBedStart(_liftedVars.toString());
const unliftedVars = parseBedStart(_unliftedVars.toString());
const liftedVarsEnd = parseBedEnd(_liftedVars.toString());

const unliftedMap: { [key: string]: boolean } = unliftedVars.reduce(
(acc, curr) => ({ ...acc, [curr]: true }),
{}
);
const unliftedVariants: VariantQueryDataResult[] = [];
const unliftedMap: { [key: string]: boolean } = unliftedVars.reduce(
(acc, curr) => ({ ...acc, [curr]: true }),
{}
);
const unliftedVariants: VariantQueryDataResult[] = [];

// Merge lifted variants with dataForAnnotation. Filter unmapped variants.
dataForLiftover.forEach((v, i) => {
if (unliftedMap[(v.variant.start - 1).toString()]) {
v.variant.assemblyIdCurrent = v.variant.assemblyId;
unliftedVariants.push(v);
} else {
v.variant.start = Number(liftedVars[i]) + 1; // Convert from BED format to position format.
v.variant.end = Number(liftedVarsEnd[i]);
v.variant.assemblyIdCurrent = assemblyIdInput;
dataForAnnotation.push(v);
}
});
// Merge lifted variants with dataForAnnotation. Filter unmapped variants.
dataForLiftover.forEach((v, i) => {
if (unliftedMap[(v.variant.start - 1).toString()]) {
v.variant.assemblyIdCurrent = v.variant.assemblyId;
unliftedVariants.push(v);
} else {
v.variant.start = Number(liftedVars[i]) + 1; // Convert from BED format to position format.
v.variant.end = Number(liftedVarsEnd[i]);
v.variant.assemblyIdCurrent = assemblyIdInput;
dataForAnnotation.push(v);
}
});

// Compute the annotation position for the variants that are in user's requested assembly.
let geneStart = Infinity;
let geneEnd = 0;
dataForAnnotation.forEach(result => {
if (result.variant.start < geneStart) {
geneStart = result.variant.start;
}
if (result.variant.end > geneEnd) {
geneEnd = result.variant.end;
}
});
// Compute the annotation position for the variants that are in user's requested assembly.
let geneStart = Infinity;
let geneEnd = 0;
dataForAnnotation.forEach(result => {
if (result.variant.start < geneStart) {
geneStart = result.variant.start;
}
if (result.variant.end > geneEnd) {
geneEnd = result.variant.end;
}
});

let annotationPosition = '';
if (dataForAnnotation.length > 0)
annotationPosition = `${dataForAnnotation[0].variant.chromosome}:${geneStart}-${geneEnd}`;
promises.rm(lifted);
promises.rm(unlifted);
promises.rm(bedfile);
let annotationPosition = '';
if (dataForAnnotation.length > 0)
annotationPosition = `${dataForAnnotation[0].variant.chromosome}:${geneStart}-${geneEnd}`;
promises.rm(lifted);
promises.rm(unlifted);
promises.rm(bedfile);

return { dataForAnnotation, unliftedVariants, annotationPosition };
return { dataForAnnotation, unliftedVariants, annotationPosition };
} catch (e) {
console.error(e);
return { dataForAnnotation, unliftedVariants: [], annotationPosition: '' };
}
}
);

Expand Down
11 changes: 11 additions & 0 deletions server/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,17 @@ export interface VariantCoordinate {
pos: number;
}

// Very specific type so that it can be put into a mongo.aggregate under "$or:"
// CMH indels use "-" unlike G4RD, so we compare $ref[1:] or $alt[1:] to ref or alt in CMH respectively
// '$ref' for deletions, '$alt' for insertions (ie. which one has more nucleotides)
export interface CMHVariantIndelCoordinate<T extends '$ref' | '$alt'> {
$expr: {
$eq: [{ $substrCP: [T, 1, { $strLenCP: T }] }, string];
};
pos: number;
chrom: string;
}

export interface CaddAnnotation extends VariantCoordinate {
aaAlt: string;
aaPos: string;
Expand Down