diff --git a/svanna-cli/src/test/resources/multigene-inversion.vcf b/svanna-cli/src/test/resources/multigene-inversion.vcf new file mode 100644 index 00000000..17786a03 --- /dev/null +++ b/svanna-cli/src/test/resources/multigene-inversion.vcf @@ -0,0 +1,37 @@ +##fileformat=VCFv4.2 +##fileDate=2021-01-01T00:00:00.000Z +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample +chr12 25803195 Something N 1000 PASS SVTYPE=INV;END=125316602;SVLEN=0 GT:DP:AD 0/1:10:5,5 \ No newline at end of file diff --git a/svanna-core/src/main/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculator.java b/svanna-core/src/main/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculator.java index 50fe6faa..2ee3e7ae 100644 --- a/svanna-core/src/main/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculator.java +++ b/svanna-core/src/main/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculator.java @@ -117,7 +117,7 @@ private static List mapToPaddedExons(Transcript tx, int cdsStart, in @Override public double projectImpact(Projection projection) { List transcripts = projection.source().transcriptStream() - .collect(Collectors.toUnmodifiableList()); + .collect(Collectors.toList()); double promoterImpact = checkPromoter(projection.route().segments(), transcripts); double geneImpact = projection.isIntraSegment() @@ -134,15 +134,15 @@ public double noImpact() { return geneFactor; } - private double checkPromoter(List segments, Collection transcripts) { - Set nonGapSegments = segments.stream() - .filter(s -> s.event() != Event.GAP) - .collect(Collectors.toSet()); + private double checkPromoter(Collection segments, Iterable transcripts) { + Iterable nonGapSegments = segments.stream() + .filter(s -> !Event.GAP.equals(s.event())) + .collect(Collectors.toList()); double score = Double.NaN; for (Transcript tx : transcripts) { int txStart = tx.start(); - int promoterStart = txStart - promoterLength; + int promoterStart = Math.max(txStart - promoterLength, 0); // Let's not allow negative start coordinate. int promoterEnd = txStart + Coordinates.endDelta(tx.coordinateSystem()); for (Segment nonGapSegment : nonGapSegments) { @@ -150,13 +150,27 @@ private double checkPromoter(List segments, Collection tran int segmentEnd = nonGapSegment.endOnStrand(tx.strand()); if (Coordinates.overlap(tx.coordinateSystem(), promoterStart, promoterEnd, nonGapSegment.coordinateSystem(), segmentStart, segmentEnd)) { - double fitness = fitnessWithEvent.get(nonGapSegment.event()); + double fitness; + if (Event.INVERSION.equals(nonGapSegment.event())) { + if (Coordinates.aContainsB(CS, segmentStart, segmentEnd, CS, promoterStart, promoterEnd)) + // Inversion of an entire promoter is not considered deleterious. + fitness = 1; + else + // The promoter is disrupted by the inversion boundary, hence deleterious. + fitness = fitnessWithEvent.get(Event.INVERSION); + } else { + // All other events are scored using default scoring scheme. + fitness = fitnessWithEvent.get(nonGapSegment.event()); + } + // In theory, `fitness + geneFactor * promoterFitnessGain` can be above 1, which does not make sense. // Let's not allow that to happen. - double bla = Math.min(fitness + geneFactor * promoterFitnessGain, 1.); - fitness = Math.min(bla, geneFactor); - if (Double.isNaN(score) || fitness < score) - score = fitness; + double maxAllowedFitness = Math.min(fitness + geneFactor * promoterFitnessGain, 1.); + fitness = Math.min(maxAllowedFitness, geneFactor); + + score = Double.isNaN(score) + ? fitness + : Math.min(fitness, score); } } }