diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java index 2316dd040..9cb9b0d53 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java @@ -26,6 +26,7 @@ abstract class AbstractPrioritizeCommand extends BaseLiricalCommand { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractPrioritizeCommand.class); + private static final String UNKNOWN_VERSION_PLACEHOLDER = "UNKNOWN VERSION"; // ---------------------------------------------- OUTPUTS ---------------------------------------------------------- @CommandLine.ArgGroup(validate = false, heading = "Output options:%n") @@ -75,19 +76,28 @@ public Integer call() throws Exception { if (!errors.isEmpty()) throw new LiricalException(String.format("Errors: %s", String.join(", ", errors))); + GenomeBuild genomeBuild = parseGenomeBuild(getGenomeBuild()); + LOGGER.debug("Using genome build {}", genomeBuild); + + LOGGER.debug("Using {} transcripts", runConfiguration.transcriptDb); + TranscriptDatabase transcriptDb = runConfiguration.transcriptDb; + // 1 - bootstrap the app - Lirical lirical = bootstrapLirical(); + Lirical lirical = bootstrapLirical(genomeBuild); + LOGGER.info("Configured LIRICAL {}", lirical.version() + .map("v%s"::formatted) + .orElse(UNKNOWN_VERSION_PLACEHOLDER)); // 2 - prepare inputs LOGGER.info("Preparing the analysis data"); - AnalysisData analysisData = prepareAnalysisData(lirical); + AnalysisData analysisData = prepareAnalysisData(lirical, genomeBuild, transcriptDb); if (analysisData.presentPhenotypeTerms().isEmpty() && analysisData.negatedPhenotypeTerms().isEmpty()) { LOGGER.warn("No phenotype terms were provided. Aborting.."); return 1; } // 3 - run the analysis - AnalysisOptions analysisOptions = prepareAnalysisOptions(lirical); + AnalysisOptions analysisOptions = prepareAnalysisOptions(lirical, genomeBuild, transcriptDb); LOGGER.info("Starting the analysis"); LiricalAnalysisRunner analysisRunner = lirical.analysisRunner(); AnalysisResults results = analysisRunner.run(analysisData, analysisOptions); @@ -96,9 +106,9 @@ public Integer call() throws Exception { LOGGER.info("Writing out the results"); FilteringStats filteringStats = analysisData.genes().computeFilteringStats(); AnalysisResultsMetadata metadata = AnalysisResultsMetadata.builder() - .setLiricalVersion(LIRICAL_VERSION) - .setHpoVersion(lirical.phenotypeService().hpo().getMetaInfo().getOrDefault("release", "UNKNOWN RELEASE")) - .setTranscriptDatabase(runConfiguration.transcriptDb.toString()) + .setLiricalVersion(lirical.version().orElse(UNKNOWN_VERSION_PLACEHOLDER)) + .setHpoVersion(lirical.phenotypeService().hpo().version().orElse(UNKNOWN_VERSION_PLACEHOLDER)) + .setTranscriptDatabase(transcriptDb.toString()) .setLiricalPath(dataSection.liricalDataDirectory.toAbsolutePath().toString()) .setExomiserPath(dataSection.exomiserDatabase == null ? "" : dataSection.exomiserDatabase.toAbsolutePath().toString()) .setAnalysisDate(LocalDateTime.now().toString()) @@ -141,7 +151,7 @@ protected List checkInput() { return errors; } - protected abstract AnalysisData prepareAnalysisData(Lirical lirical) throws LiricalParseException; + protected abstract AnalysisData prepareAnalysisData(Lirical lirical, GenomeBuild genomeBuild, TranscriptDatabase transcriptDb) throws LiricalParseException; protected OutputOptions createOutputOptions() { LrThreshold lrThreshold = output.lrThreshold == null ? LrThreshold.notInitialized() : LrThreshold.setToUserDefinedThreshold(output.lrThreshold); diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AnalysisDataParserAwareCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AnalysisDataParserAwareCommand.java index 097989f71..54bf99d90 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AnalysisDataParserAwareCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AnalysisDataParserAwareCommand.java @@ -3,15 +3,17 @@ import org.monarchinitiative.lirical.core.Lirical; import org.monarchinitiative.lirical.core.analysis.AnalysisData; import org.monarchinitiative.lirical.core.analysis.LiricalParseException; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; import org.monarchinitiative.lirical.io.analysis.AnalysisDataParserFactory; abstract class AnalysisDataParserAwareCommand extends AbstractPrioritizeCommand { @Override - protected AnalysisData prepareAnalysisData(Lirical lirical) throws LiricalParseException { + protected AnalysisData prepareAnalysisData(Lirical lirical, GenomeBuild genomeBuild, TranscriptDatabase transcriptDb) throws LiricalParseException { HpoTermSanitizer sanitizer = new HpoTermSanitizer(lirical.phenotypeService().hpo()); - AnalysisDataParserFactory parserFactory = new AnalysisDataParserFactory(sanitizer, lirical.variantParserFactory().orElse(null), lirical.phenotypeService().associationData()); + AnalysisDataParserFactory parserFactory = new AnalysisDataParserFactory(sanitizer, lirical.variantParserFactory(), lirical.phenotypeService().associationData()); return prepareAnalysisData(parserFactory); } diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BaseLiricalCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BaseLiricalCommand.java index 99e9fdc17..09b56c6e2 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BaseLiricalCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BaseLiricalCommand.java @@ -1,6 +1,5 @@ package org.monarchinitiative.lirical.cli.cmd; -import org.monarchinitiative.lirical.configuration.GenotypeLrProperties; import org.monarchinitiative.lirical.configuration.LiricalBuilder; import org.monarchinitiative.lirical.core.Lirical; import org.monarchinitiative.lirical.core.analysis.AnalysisOptions; @@ -11,8 +10,8 @@ import org.monarchinitiative.lirical.core.io.VariantParser; import org.monarchinitiative.lirical.core.io.VariantParserFactory; import org.monarchinitiative.lirical.core.model.*; -import org.monarchinitiative.lirical.core.service.TranscriptDatabase; import org.monarchinitiative.lirical.io.LiricalDataException; +import org.monarchinitiative.lirical.io.background.CustomBackgroundVariantFrequencyServiceFactory; import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; import org.slf4j.Logger; @@ -23,6 +22,7 @@ import java.nio.file.Path; import java.util.*; import java.util.concurrent.Callable; +import java.util.stream.Collectors; /** * Base class that describes data and configuration sections of the CLI, and contains common functionalities. @@ -30,8 +30,6 @@ abstract class BaseLiricalCommand implements Callable { private static final Logger LOGGER = LoggerFactory.getLogger(BaseLiricalCommand.class); - private static final Properties PROPERTIES = readProperties(); - protected static final String LIRICAL_VERSION = PROPERTIES.getProperty("lirical.version", "unknown version"); private static String readBanner() { try (InputStream is = new BufferedInputStream(Objects.requireNonNull(BaseLiricalCommand.class.getResourceAsStream("/banner.txt")))) { @@ -52,10 +50,18 @@ public static class DataSection { description = "Path to Lirical data directory.") public Path liricalDataDirectory; - @CommandLine.Option(names = {"-e", "--exomiser"}, + @CommandLine.Option(names = {"-e", "--exomiser"}, description = "Path to the Exomiser variant database.") public Path exomiserDatabase = null; + @CommandLine.Option(names = {"-e19", "--exomiser-hg19"}, + description = "Path to the Exomiser variant database for hg19.") + public Path exomiserHg19Database = null; + + @CommandLine.Option(names = {"-e38", "--exomiser-hg38"}, + description = "Path to the Exomiser variant database for hg38.") + public Path exomiserHg38Database = null; + @CommandLine.Option(names = {"-b", "--background"}, description = "Path to non-default background frequency file.") public Path backgroundFrequencyFile = null; @@ -92,35 +98,27 @@ public static class RunConfiguration { @CommandLine.Option(names = {"--strict"}, description = "Use strict penalties if the genotype does not match the disease model in terms " + "of number of called pathogenic alleles. (default: ${DEFAULT-VALUE}).") - public boolean strict = false; + public boolean useStrictPenalties = false; - /* Default frequency of called-pathogenic variants in the general population (gnomAD). In the vast majority of - * cases, we can derive this information from gnomAD. This constant is used if for whatever reason, - * data was not available. - */ @CommandLine.Option(names = {"--variant-background-frequency"}, - // TODO - add better description - description = "Default background frequency of variants in a gene (default: ${DEFAULT-VALUE}).") + description = { + "Default frequency of called-pathogenic variants in the general population (gnomAD).", + "In the vast majority of cases, we can derive this information from gnomAD.", + "This constant is used if for whatever reason, data was not available.", + "(default: ${DEFAULT-VALUE})."}) public double defaultVariantBackgroundFrequency = 0.1; @CommandLine.Option(names = {"--pathogenicity-threshold"}, description = "Variant with greater pathogenicity score is considered deleterious (default: ${DEFAULT-VALUE}).") public float pathogenicityThreshold = .8f; + @Deprecated(forRemoval = true, since = "2.0.0-RC2") @CommandLine.Option(names = {"--default-allele-frequency"}, - description = "Variant with greater allele frequency in at least one population is considered common (default: ${DEFAULT-VALUE}).") - public float defaultAlleleFrequency = 1E-5f; - } - - private static Properties readProperties() { - Properties properties = new Properties(); - - try (InputStream is = BaseLiricalCommand.class.getResourceAsStream("/lirical.properties")) { - properties.load(is); - } catch (IOException e) { - LOGGER.warn("Error loading properties: {}", e.getMessage()); - } - return properties; + description = { + "Variant with greater allele frequency in at least one population is considered common.", + "NOTE: the option has been DEPRECATED" + }) + public float defaultAlleleFrequency = Float.NaN; } protected static void printBanner() { @@ -135,58 +133,146 @@ protected List checkInput() { LOGGER.error(msg); errors.add(msg); } + + // Obsolete options must/should not be used + if (dataSection.exomiserDatabase != null) { + // Check the obsolete `-e | --exomiser` option is not being used. + String msg = "`-e | --exomiser` option has been deprecated. Use `-e19 or -e38` to set paths to Exomiser variant databases for hg19 and hg38, respectively"; + LOGGER.error(msg); + errors.add(msg); + } + + if (!Float.isNaN(runConfiguration.defaultAlleleFrequency)) { + String msg = "`--default-allele-frequency` option has been deprecated."; + LOGGER.error(msg); + } + + Optional genomeBuild = GenomeBuild.parse(getGenomeBuild()); + if (genomeBuild.isEmpty()) { + // We must have genome build! + String msg = "Genome build must be set"; + LOGGER.error(msg); + errors.add(msg); + } else { + // Check Exomiser db seem to match the genome build. + switch (genomeBuild.get()) { + case HG19 -> { + if (dataSection.exomiserHg19Database == null && dataSection.exomiserHg38Database != null) { + String msg = "Genome build set to %s but Exomiser variant database is set for %s: %s".formatted(GenomeBuild.HG19, GenomeBuild.HG38, dataSection.exomiserHg38Database.toAbsolutePath()); + LOGGER.error(msg); + errors.add(msg); + } + } + case HG38 -> { + if (dataSection.exomiserHg38Database == null && dataSection.exomiserHg19Database != null) { + String msg = "Genome build set to %s but Exomiser variant database is set for %s: %s".formatted(GenomeBuild.HG38, GenomeBuild.HG19, dataSection.exomiserHg19Database.toAbsolutePath()); + LOGGER.error(msg); + errors.add(msg); + } + } + } + } + return errors; } /** - * Build {@link Lirical} based on {@link DataSection} and {@link RunConfiguration} sections. + * Build {@link Lirical} for a {@link GenomeBuild} based on {@link DataSection} and {@link RunConfiguration} sections. */ - protected Lirical bootstrapLirical() throws LiricalDataException { - LOGGER.info("Spooling up Lirical v{}", LIRICAL_VERSION); - GenomeBuild genomeBuild = parseGenomeBuild(getGenomeBuild()); - - GenotypeLrProperties genotypeLrProperties = new GenotypeLrProperties(runConfiguration.pathogenicityThreshold, runConfiguration.defaultVariantBackgroundFrequency, runConfiguration.strict); - return LiricalBuilder.builder(dataSection.liricalDataDirectory) - .exomiserVariantDatabase(dataSection.exomiserDatabase) - .genomeBuild(genomeBuild) - .backgroundVariantFrequency(dataSection.backgroundFrequencyFile) - .setDiseaseDatabases(runConfiguration.useOrphanet - ? DiseaseDatabase.allKnownDiseaseDatabases() - : Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.DECIPHER)) - .genotypeLrProperties(genotypeLrProperties) - .transcriptDatabase(runConfiguration.transcriptDb) - .defaultVariantAlleleFrequency(runConfiguration.defaultAlleleFrequency) - .build(); + protected Lirical bootstrapLirical(GenomeBuild genomeBuild) throws LiricalDataException { + LiricalBuilder builder = LiricalBuilder.builder(dataSection.liricalDataDirectory); + + switch (genomeBuild) { + case HG19 -> { + if (dataSection.exomiserHg19Database != null) + builder.exomiserVariantDbPath(GenomeBuild.HG19, dataSection.exomiserHg19Database); + } + case HG38 -> { + if (dataSection.exomiserHg38Database != null) + builder.exomiserVariantDbPath(GenomeBuild.HG38, dataSection.exomiserHg38Database); + } + } + + if (dataSection.backgroundFrequencyFile != null) { + LOGGER.debug("Using custom deleterious variant background frequency file at {} for {}", + dataSection.backgroundFrequencyFile.toAbsolutePath(), + genomeBuild); + Map backgroundFrequencies = Map.of(genomeBuild, dataSection.backgroundFrequencyFile); + CustomBackgroundVariantFrequencyServiceFactory backgroundFreqFactory = CustomBackgroundVariantFrequencyServiceFactory.of(backgroundFrequencies); + builder.backgroundVariantFrequencyServiceFactory(backgroundFreqFactory); + } + + return builder.build(); } protected abstract String getGenomeBuild(); - private GenomeBuild parseGenomeBuild(String genomeBuild) throws LiricalDataException { + protected GenomeBuild parseGenomeBuild(String genomeBuild) throws LiricalDataException { Optional genomeBuildOptional = GenomeBuild.parse(genomeBuild); if (genomeBuildOptional.isEmpty()) throw new LiricalDataException("Unknown genome build: '" + genomeBuild + "'"); return genomeBuildOptional.get(); } - protected AnalysisOptions prepareAnalysisOptions(Lirical lirical) { + protected AnalysisOptions prepareAnalysisOptions(Lirical lirical, GenomeBuild genomeBuild, TranscriptDatabase transcriptDb) { + AnalysisOptions.Builder builder = AnalysisOptions.builder(); + + // Genome build + builder.genomeBuild(genomeBuild); + + // Tx databases + builder.transcriptDatabase(transcriptDb); + + // Disease databases + Set diseaseDatabases = runConfiguration.useOrphanet + ? DiseaseDatabase.allKnownDiseaseDatabases() + : Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.DECIPHER); + String usedDatabasesSummary = diseaseDatabases.stream().map(DiseaseDatabase::name).collect(Collectors.joining(", ", "[", "]")); + LOGGER.debug("Using disease databases {}", usedDatabasesSummary); + builder.setDiseaseDatabases(diseaseDatabases); + + // The rest.. + LOGGER.debug("Variants with pathogenicity score >{} are considered deleterious", runConfiguration.pathogenicityThreshold); + builder.variantDeleteriousnessThreshold(runConfiguration.pathogenicityThreshold); + + LOGGER.debug("Variant background frequency is set to {}", runConfiguration.defaultVariantBackgroundFrequency); + builder.defaultVariantBackgroundFrequency(runConfiguration.defaultVariantBackgroundFrequency); + + LOGGER.debug("Using strict penalties if the genotype does not match the disease model " + + "in terms of number of called pathogenic alleles? {}", runConfiguration.useStrictPenalties); + builder.useStrictPenalties(runConfiguration.useStrictPenalties); + + LOGGER.debug("Running in global mode? {}", runConfiguration.globalAnalysisMode); + builder.useGlobal(runConfiguration.globalAnalysisMode); + LOGGER.debug("Using uniform pretest disease probabilities."); PretestDiseaseProbability pretestDiseaseProbability = PretestDiseaseProbabilities.uniform(lirical.phenotypeService().diseases()); - return AnalysisOptions.of(runConfiguration.globalAnalysisMode, - pretestDiseaseProbability, - runConfiguration.disregardDiseaseWithNoDeleteriousVariants, - runConfiguration.pathogenicityThreshold); + builder.pretestProbability(pretestDiseaseProbability); + + LOGGER.debug("Disregarding diseases with no deleterious variants? {}", runConfiguration.disregardDiseaseWithNoDeleteriousVariants); + builder.disregardDiseaseWithNoDeleteriousVariants(runConfiguration.disregardDiseaseWithNoDeleteriousVariants); + + return builder.build(); } protected static GenesAndGenotypes readVariantsFromVcfFile(String sampleId, Path vcfPath, + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDatabase, VariantParserFactory parserFactory) throws LiricalParseException { if (parserFactory == null) { LOGGER.warn("Cannot process the provided VCF file {}, resources are not set.", vcfPath.toAbsolutePath()); return GenesAndGenotypes.empty(); } + Optional parser = parserFactory.forPath(vcfPath, genomeBuild, transcriptDatabase); + if (parser.isEmpty()) { + LOGGER.warn("Cannot obtain parser for processing the VCF file {} with {} {} due to missing resources", + vcfPath.toAbsolutePath(), genomeBuild, transcriptDatabase); + return GenesAndGenotypes.empty(); + } List variants; - try (VariantParser variantParser = parserFactory.forPath(vcfPath)) { + try (VariantParser variantParser = parser.get()) { // Ensure the VCF file contains the sample if (!variantParser.sampleNames().contains(sampleId)) throw new LiricalParseException("The sample " + sampleId + " is not present in VCF at '" + vcfPath.toAbsolutePath() + '\''); diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java index 81732481d..8073b48f8 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java @@ -76,13 +76,15 @@ public Integer call() throws Exception { List errors = checkInput(); if (!errors.isEmpty()) throw new LiricalException(String.format("Errors: %s", String.join(", ", errors))); + GenomeBuild genomeBuild = parseGenomeBuild(getGenomeBuild()); + TranscriptDatabase transcriptDb = runConfiguration.transcriptDb; // 1 - bootstrap LIRICAL. - Lirical lirical = bootstrapLirical(); + Lirical lirical = bootstrapLirical(genomeBuild); // 2 - prepare the simulation data shared by all phenopackets. - AnalysisOptions analysisOptions = prepareAnalysisOptions(lirical); - List backgroundVariants = readBackgroundVariants(lirical); + AnalysisOptions analysisOptions = prepareAnalysisOptions(lirical, genomeBuild, transcriptDb); + List backgroundVariants = readBackgroundVariants(lirical, genomeBuild, transcriptDb); try (BufferedWriter writer = openWriter(outputPath); CSVPrinter printer = CSVFormat.DEFAULT.print(writer)) { @@ -131,17 +133,22 @@ protected String getGenomeBuild() { return genomeBuild; } - private List readBackgroundVariants(Lirical lirical) throws LiricalParseException { + private List readBackgroundVariants(Lirical lirical, + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDatabase) throws LiricalParseException { if (vcfPath == null) { LOGGER.info("Path to VCF file was not provided."); return List.of(); } - if (lirical.variantParserFactory().isEmpty()) { - LOGGER.warn("Cannot process the provided VCF file {}, resources are not set.", vcfPath.toAbsolutePath()); + + Optional parser = lirical.variantParserFactory().forPath(vcfPath, genomeBuild, transcriptDatabase); + if (parser.isEmpty()) { + LOGGER.warn("Cannot obtain parser for processing the VCF file {} with {} {} due to missing resources", + vcfPath.toAbsolutePath(), genomeBuild, transcriptDatabase); return List.of(); } - try (VariantParser variantParser = lirical.variantParserFactory().get().forPath(vcfPath)) { + try (VariantParser variantParser = parser.get()) { // Read variants LOGGER.info("Reading background variants from {}.", vcfPath.toAbsolutePath()); ProgressReporter progressReporter = new ProgressReporter(10_000, "variants"); diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java index 614d5864b..8d2ba213b 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java @@ -4,6 +4,8 @@ import org.monarchinitiative.lirical.core.analysis.AnalysisData; import org.monarchinitiative.lirical.core.analysis.LiricalParseException; import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; import org.monarchinitiative.lirical.io.analysis.*; import org.monarchinitiative.phenol.ontology.data.TermId; @@ -53,7 +55,9 @@ protected String getGenomeBuild() { } @Override - protected AnalysisData prepareAnalysisData(Lirical lirical) throws LiricalParseException { + protected AnalysisData prepareAnalysisData(Lirical lirical, + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDb) throws LiricalParseException { LOGGER.info("Reading phenopacket from {}.", phenopacketPath.toAbsolutePath()); PhenopacketData data = null; @@ -89,7 +93,7 @@ protected AnalysisData prepareAnalysisData(Lirical lirical) throws LiricalParseE if (vcfPath == null) { genes = GenesAndGenotypes.empty(); } else { - genes = readVariantsFromVcfFile(sampleId, vcfPath, lirical.variantParserFactory().orElse(null)); + genes = readVariantsFromVcfFile(sampleId, vcfPath, genomeBuild, transcriptDb, lirical.variantParserFactory()); } return AnalysisData.of(sampleId, data.getAge().orElse(null), diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java index 28655d8de..3e2b2019b 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java @@ -4,7 +4,9 @@ import org.monarchinitiative.lirical.core.analysis.AnalysisData; import org.monarchinitiative.lirical.core.analysis.LiricalParseException; import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; +import org.monarchinitiative.lirical.core.model.GenomeBuild; import org.monarchinitiative.lirical.core.model.Sex; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; import org.monarchinitiative.phenol.ontology.data.TermId; import picocli.CommandLine; @@ -58,7 +60,9 @@ protected String getGenomeBuild() { } @Override - protected AnalysisData prepareAnalysisData(Lirical lirical) throws LiricalParseException { + protected AnalysisData prepareAnalysisData(Lirical lirical, + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDb) throws LiricalParseException { HpoTermSanitizer sanitizer = new HpoTermSanitizer(lirical.phenotypeService().hpo()); List observedTerms; @@ -89,7 +93,7 @@ protected AnalysisData prepareAnalysisData(Lirical lirical) throws LiricalParseE if (vcfPath == null) { genes = GenesAndGenotypes.empty(); } else { - genes = readVariantsFromVcfFile(sampleId, vcfPath, lirical.variantParserFactory().orElse(null)); + genes = readVariantsFromVcfFile(sampleId, vcfPath, genomeBuild, transcriptDb, lirical.variantParserFactory()); } return AnalysisData.of(sampleId, parseAge(age), sex, observedTerms, negatedTerms, genes); diff --git a/lirical-configuration/pom.xml b/lirical-configuration/pom.xml index d3bea5a3b..f27dbe82d 100644 --- a/lirical-configuration/pom.xml +++ b/lirical-configuration/pom.xml @@ -23,4 +23,24 @@ ${project.parent.version} + + + + + src/main/resources + true + + + + + org.apache.maven.plugins + maven-resources-plugin + + + tsv + + + + + \ No newline at end of file diff --git a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/GenotypeLrProperties.java b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/GenotypeLrProperties.java index 9d750a3a6..e5f07e85e 100644 --- a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/GenotypeLrProperties.java +++ b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/GenotypeLrProperties.java @@ -1,4 +1,63 @@ package org.monarchinitiative.lirical.configuration; -public record GenotypeLrProperties(float pathogenicityThreshold, double defaultVariantFrequency, boolean strict) { +import java.util.Objects; + +/** + * @deprecated to be removed in 2.0.0 without replacement as a redundant class. + */ +@Deprecated(forRemoval = true, since = "2.0.0-RC2") +public final class GenotypeLrProperties { + private final float pathogenicityThreshold; + private final double defaultVariantBackgroundFrequency; + private final boolean strict; + + public GenotypeLrProperties(float pathogenicityThreshold, double defaultVariantBackgroundFrequency, boolean strict) { + this.pathogenicityThreshold = pathogenicityThreshold; + this.defaultVariantBackgroundFrequency = defaultVariantBackgroundFrequency; + this.strict = strict; + } + + public float pathogenicityThreshold() { + return pathogenicityThreshold; + } + + /** + * @deprecated use {@link #defaultVariantBackgroundFrequency()} instead. + */ + @Deprecated(since = "2.0.0-RC2", forRemoval = true) + public double defaultVariantFrequency() { + return defaultVariantBackgroundFrequency; + } + + public double defaultVariantBackgroundFrequency() { + return defaultVariantBackgroundFrequency; + } + + public boolean strict() { + return strict; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (GenotypeLrProperties) obj; + return Float.floatToIntBits(this.pathogenicityThreshold) == Float.floatToIntBits(that.pathogenicityThreshold) && + Double.doubleToLongBits(this.defaultVariantBackgroundFrequency) == Double.doubleToLongBits(that.defaultVariantBackgroundFrequency) && + this.strict == that.strict; + } + + @Override + public int hashCode() { + return Objects.hash(pathogenicityThreshold, defaultVariantBackgroundFrequency, strict); + } + + @Override + public String toString() { + return "GenotypeLrProperties[" + + "pathogenicityThreshold=" + pathogenicityThreshold + ", " + + "defaultVariantBackgroundFrequency=" + defaultVariantBackgroundFrequency + ", " + + "strict=" + strict + ']'; + } + } diff --git a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java index b605914f9..08af2d9bf 100644 --- a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java +++ b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java @@ -1,59 +1,47 @@ package org.monarchinitiative.lirical.configuration; -import de.charite.compbio.jannovar.data.JannovarData; -import de.charite.compbio.jannovar.data.JannovarDataSerializer; -import de.charite.compbio.jannovar.data.SerializationException; +import org.monarchinitiative.lirical.configuration.impl.BundledBackgroundVariantFrequencyServiceFactory; import org.monarchinitiative.lirical.core.Lirical; -import org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunner; -import org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunnerImpl; import org.monarchinitiative.lirical.core.analysis.probability.PretestDiseaseProbability; import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLikelihoodRatio; import org.monarchinitiative.lirical.core.likelihoodratio.PhenotypeLikelihoodRatio; import org.monarchinitiative.lirical.core.model.GenomeBuild; import org.monarchinitiative.lirical.core.output.AnalysisResultWriterFactory; import org.monarchinitiative.lirical.core.service.*; -import org.monarchinitiative.lirical.exomiser_db_adapter.ExomiserMvStoreMetadataService; -import org.monarchinitiative.lirical.io.GenotypeDataIngestor; +import org.monarchinitiative.lirical.exomiser_db_adapter.ExomiserMvStoreMetadataServiceFactory; import org.monarchinitiative.lirical.io.LiricalDataException; import org.monarchinitiative.lirical.io.LiricalDataResolver; import org.monarchinitiative.lirical.core.io.VariantParserFactory; -import org.monarchinitiative.lirical.io.service.JannovarFunctionalVariantAnnotator; +import org.monarchinitiative.lirical.io.service.JannovarFunctionalVariantAnnotatorService; import org.monarchinitiative.lirical.io.vcf.VcfVariantParserFactory; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaderOptions; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenol.ontology.data.TermId; -import org.monarchinitiative.svart.assembly.GenomicAssembly; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; import java.io.IOException; -import java.nio.file.Files; +import java.io.InputStream; import java.nio.file.Path; import java.util.*; +import java.util.stream.Collectors; public class LiricalBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(LiricalBuilder.class); + private static final Properties PROPERTIES = readProperties(); + private static final String LIRICAL_VERSION = PROPERTIES.getProperty("lirical.version", "UNKNOWN VERSION"); private final Path dataDirectory; private final LiricalDataResolver liricalDataResolver; - private final Set diseaseDatabases = new HashSet<>(Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.DECIPHER)); - private GenomeBuild genomeBuild = GenomeBuild.HG38; - private Path exomiserVariantDatabase = null; - private Path backgroundVariantFrequency = null; - private TranscriptDatabase transcriptDatabase = TranscriptDatabase.REFSEQ; - private float defaultVariantAlleleFrequency = VariantMetadataService.DEFAULT_FREQUENCY; - private GenotypeLrProperties genotypeLrProperties = new GenotypeLrProperties(.8f, .1, false); - private PhenotypeLikelihoodRatio phenotypeLikelihoodRatio = null; - private GenotypeLikelihoodRatio genotypeLikelihoodRatio = null; + private final Map exomiserVariantDatabasePaths = new HashMap<>(2); private PhenotypeService phenotypeService = null; + private BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory = null; - private VariantMetadataService variantMetadataService = null; - private FunctionalVariantAnnotator functionalVariantAnnotator = null; + private VariantMetadataServiceFactory variantMetadataServiceFactory = null; + private FunctionalVariantAnnotatorService functionalVariantAnnotatorService = null; public static LiricalBuilder builder(Path liricalDataDirectory) throws LiricalDataException { return new LiricalBuilder(liricalDataDirectory); @@ -64,31 +52,69 @@ private LiricalBuilder(Path dataDirectory) throws LiricalDataException { this.liricalDataResolver = LiricalDataResolver.of(dataDirectory); } + /** + * @deprecated use {@link #exomiserVariantDbPath(GenomeBuild, Path)} instead. + */ + @Deprecated(since = "2.0.0-RC2", forRemoval = true) public LiricalBuilder exomiserVariantDatabase(Path exomiserVariantDatabase) { - this.exomiserVariantDatabase = exomiserVariantDatabase; + LOGGER.warn("Setting path to Exomiser database has been deprecated. Use `exomiserVariantDbPath(GenomeBuild genomeBuild, Path exomiserVariantDatabase)` to set path to database for a genome build!"); return this; } - public LiricalBuilder genomeBuild(GenomeBuild genomeBuild) { + /** + * Set path to exomiser variant database for given {@link GenomeBuild}. + * @return the builder + */ + public LiricalBuilder exomiserVariantDbPath(GenomeBuild genomeBuild, Path exomiserVariantDatabase) { if (genomeBuild == null) { - LOGGER.warn("Cannot set genome build to null. Retaining {}", this.genomeBuild); + LOGGER.warn("Genome build must not be null: {}", exomiserVariantDatabase); return this; } - this.genomeBuild = genomeBuild; + this.exomiserVariantDatabasePaths.put(genomeBuild, exomiserVariantDatabase); + return this; + } + + public LiricalBuilder clearExomiserVariantDatabaseForGenomeBuild(GenomeBuild genomeBuild) { + if (genomeBuild == null) { + LOGGER.warn("Cannot clear database for null genome build!"); + return this; + } + this.exomiserVariantDatabasePaths.remove(genomeBuild); + return this; + } + + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") + public LiricalBuilder genomeBuild(GenomeBuild genomeBuild) { + LOGGER.warn("Setting genome build has been deprecated. Set the desired genome build via AnalysisOptions!"); return this; } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder backgroundVariantFrequency(Path backgroundVariantFrequency) { - this.backgroundVariantFrequency = backgroundVariantFrequency; + LOGGER.warn("Setting path to background variant frequency has been deprecated! Set backgroundVariantFrequencyServiceFactory instead!"); + return this; + } + + public LiricalBuilder backgroundVariantFrequencyServiceFactory(BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory) { + this.backgroundVariantFrequencyServiceFactory = backgroundVariantFrequencyServiceFactory; return this; } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder transcriptDatabase(TranscriptDatabase transcriptDatabase) { - if (transcriptDatabase == null) { - LOGGER.warn("Cannot set transcript database to null"); - return this; - } - this.transcriptDatabase = transcriptDatabase; + LOGGER.warn("Setting transcript database has been deprecated. Set the desired database build via AnalysisOptions!"); return this; } @@ -96,18 +122,22 @@ public LiricalBuilder transcriptDatabase(TranscriptDatabase transcriptDatabase) * @param defaultVariantAlleleFrequency default variant allele frequency to set. * The frequency is only used if * {@link #variantMetadataService(VariantMetadataService)} is unset. + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder defaultVariantAlleleFrequency(float defaultVariantAlleleFrequency) { - this.defaultVariantAlleleFrequency = defaultVariantAlleleFrequency; + LOGGER.warn("Setting default variant allele frequency has been deprecated. Set the desired value via AnalysisOptions!"); return this; } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder genotypeLrProperties(GenotypeLrProperties genotypeLrProperties) { - if (genotypeLrProperties == null) { - LOGGER.warn("Cannot set genotype likelihood ratio properties to null"); - return this; - } - this.genotypeLrProperties = genotypeLrProperties; + LOGGER.warn("Setting genotype LR properties has been deprecated. Set the desired value via AnalysisOptions!"); return this; } @@ -116,41 +146,58 @@ public LiricalBuilder phenotypeService(PhenotypeService phenotypeService) { return this; } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder clearDiseaseDatabases() { - this.diseaseDatabases.clear(); + LOGGER.warn("Setting disease databases has been deprecated. Set the desired disease databases via AnalysisOptions"); return this; } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder addDiseaseDatabases(DiseaseDatabase... diseaseDatabases) { return addDiseaseDatabases(Arrays.asList(diseaseDatabases)); } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder addDiseaseDatabases(Collection diseaseDatabases) { - if (diseaseDatabases == null) { - LOGGER.warn("Disease databases should not be null!"); - return this; - } - this.diseaseDatabases.addAll(diseaseDatabases); + LOGGER.warn("Setting disease databases has been deprecated. Set the desired disease databases via AnalysisOptions"); return this; } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder setDiseaseDatabases(Collection diseaseDatabases) { - if (diseaseDatabases == null) { - LOGGER.warn("Disease databases should not be null!"); - return this; - } - this.diseaseDatabases.clear(); - this.diseaseDatabases.addAll(diseaseDatabases); + LOGGER.warn("Setting disease databases has been deprecated. Set the desired disease databases via AnalysisOptions"); return this; } + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder phenotypeLikelihoodRatio(PhenotypeLikelihoodRatio phenotypeLikelihoodRatio) { - this.phenotypeLikelihoodRatio = phenotypeLikelihoodRatio; + LOGGER.warn("Setting phenotype LR has been deprecated. Set the desired value via AnalysisOptions!"); return this; } + /** + * @deprecated the option does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder genotypeLikelihoodRatio(GenotypeLikelihoodRatio genotypeLikelihoodRatio) { - this.genotypeLikelihoodRatio = genotypeLikelihoodRatio; + LOGGER.warn("Setting genotype LR has been deprecated. Set the desired value via AnalysisOptions!"); return this; } @@ -160,69 +207,82 @@ public LiricalBuilder genotypeLikelihoodRatio(GenotypeLikelihoodRatio genotypeLi */ @Deprecated(forRemoval = true, since = "2.0.0-SNAPSHOT") public LiricalBuilder pretestDiseaseProbability(PretestDiseaseProbability pretestDiseaseProbability) { + LOGGER.warn("Setting pretest disease probability has been deprecated. Set the desired value via AnalysisOptions!"); return this; } + /** + * @deprecated setting variant metadata service has been deprecated. + * Use {@link #variantMetadataServiceFactory(VariantMetadataServiceFactory)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") public LiricalBuilder variantMetadataService(VariantMetadataService variantMetadataService) { - this.variantMetadataService = variantMetadataService; + LOGGER.warn("Setting variant metadata service has been deprecated."); return this; } + public LiricalBuilder variantMetadataServiceFactory(VariantMetadataServiceFactory variantMetadataServiceFactory) { + this.variantMetadataServiceFactory = variantMetadataServiceFactory; + return this; + } + + /** + * @deprecated functional variant annotation configuration does not belong to the global configuration but to per-sample config (to be removed in v2.0.0). + * @return the builder + */ + @Deprecated(forRemoval = true, since = "2.0.0-SNAPSHOT") public LiricalBuilder functionalVariantAnnotator(FunctionalVariantAnnotator functionalVariantAnnotator) { - this.functionalVariantAnnotator = functionalVariantAnnotator; + LOGGER.warn("Setting functional variant annotator has been deprecated."); return this; } public Lirical build() throws LiricalDataException { // First, services if (phenotypeService == null) { - HpoDiseaseLoaderOptions diseaseLoaderOptions = HpoDiseaseLoaderOptions.of(diseaseDatabases, true, HpoDiseaseLoaderOptions.DEFAULT_COHORT_SIZE); + HpoDiseaseLoaderOptions diseaseLoaderOptions = HpoDiseaseLoaderOptions.of(DiseaseDatabase.allKnownDiseaseDatabases(), true, HpoDiseaseLoaderOptions.DEFAULT_COHORT_SIZE); phenotypeService = configurePhenotypeService(dataDirectory, diseaseLoaderOptions); } - if (functionalVariantAnnotator == null) { - LOGGER.debug("Functional variant annotator is unset. Loading Jannovar transcript database for {} transcripts.", transcriptDatabase); - JannovarData jannovarData = loadJannovarData(liricalDataResolver, genomeBuild, transcriptDatabase); - functionalVariantAnnotator = JannovarFunctionalVariantAnnotator.of(jannovarData, phenotypeService.associationData().getGeneIdentifiers()); + if (backgroundVariantFrequencyServiceFactory == null) { + LOGGER.debug("Using bundled variant background frequencies"); + backgroundVariantFrequencyServiceFactory = BundledBackgroundVariantFrequencyServiceFactory.getInstance(); + } + + if (functionalVariantAnnotatorService == null) { + LOGGER.debug("Functional variant annotator service is unset. Creating the service using resources in {}.", liricalDataResolver.dataDirectory().toAbsolutePath()); + functionalVariantAnnotatorService = JannovarFunctionalVariantAnnotatorService.of(liricalDataResolver, phenotypeService.associationData().getGeneIdentifiers()); } // VariantMetadataService and VariantParserFactory. - GenomicAssembly genomicAssembly = LoadUtils.parseSvartGenomicAssembly(genomeBuild); VariantParserFactory variantParserFactory; - if (variantMetadataService == null) { + if (this.variantMetadataServiceFactory == null) { LOGGER.debug("Variant metadata service is unset."); - if (exomiserVariantDatabase == null) { + if (exomiserVariantDatabasePaths.isEmpty()) { LOGGER.debug("Path to Exomiser database is unset. Variants will not be annotated."); - variantMetadataService = NoOpVariantMetadataService.instance(); - variantParserFactory = null; + this.variantMetadataServiceFactory = VariantMetadataServiceFactory.noOpFactory(); + variantParserFactory = VariantParserFactory.noOpFactory(); } else { - LOGGER.debug("Using Exomiser variant database at {}", exomiserVariantDatabase.toAbsolutePath()); - variantMetadataService = ExomiserMvStoreMetadataService.of(exomiserVariantDatabase, new VariantMetadataService.Options(defaultVariantAlleleFrequency)); - variantParserFactory = VcfVariantParserFactory.of(genomicAssembly, functionalVariantAnnotator, variantMetadataService); + String summary = exomiserVariantDatabasePaths.entrySet().stream() + .map(e -> "%s -> %s".formatted(e.getKey(), e.getValue().toAbsolutePath())) + .collect(Collectors.joining(", ", "{", "}")); + LOGGER.debug("Using Exomiser variant databases: {}", summary); + this.variantMetadataServiceFactory = ExomiserMvStoreMetadataServiceFactory.of(exomiserVariantDatabasePaths); + variantParserFactory = VcfVariantParserFactory.of(functionalVariantAnnotatorService, variantMetadataServiceFactory); } } else { - variantParserFactory = VcfVariantParserFactory.of(genomicAssembly, functionalVariantAnnotator, variantMetadataService); + variantParserFactory = VcfVariantParserFactory.of(functionalVariantAnnotatorService, variantMetadataServiceFactory); } - // Lirical analysis runner - if (phenotypeLikelihoodRatio == null) { - phenotypeLikelihoodRatio = new PhenotypeLikelihoodRatio(phenotypeService.hpo(), phenotypeService.diseases()); - } - - if (genotypeLikelihoodRatio == null) - genotypeLikelihoodRatio = configureGenotypeLikelihoodRatio(backgroundVariantFrequency, genomeBuild, genotypeLrProperties); - - LiricalAnalysisRunner analyzer = LiricalAnalysisRunnerImpl.of(phenotypeService, phenotypeLikelihoodRatio, genotypeLikelihoodRatio); - // Analysis result writer factory AnalysisResultWriterFactory analysisResultWriterFactory = new AnalysisResultWriterFactoryImpl(phenotypeService.hpo(), phenotypeService.diseases()); - return Lirical.of(variantParserFactory, + return Lirical.of( + variantParserFactory, phenotypeService, - functionalVariantAnnotator, - variantMetadataService, - analyzer, - analysisResultWriterFactory); + backgroundVariantFrequencyServiceFactory, + variantMetadataServiceFactory, + analysisResultWriterFactory, + LIRICAL_VERSION); } private static PhenotypeService configurePhenotypeService(Path dataDirectory, HpoDiseaseLoaderOptions options) throws LiricalDataException { @@ -237,31 +297,15 @@ private static PhenotypeService configurePhenotypeService(Path dataDirectory, Hp return PhenotypeService.of(hpo, diseases, associationData); } - private static JannovarData loadJannovarData(LiricalDataResolver liricalDataResolver, - GenomeBuild genomeBuild, - TranscriptDatabase transcriptDatabase) throws LiricalDataException { - Path txDatabasePath = liricalDataResolver.transcriptCacheFor(genomeBuild, transcriptDatabase); - LOGGER.info("Loading transcript database from {}", txDatabasePath.toAbsolutePath()); - try { - return new JannovarDataSerializer(txDatabasePath.toAbsolutePath().toString()).load(); - } catch (SerializationException e) { - throw new LiricalDataException(e); - } - } - private static GenotypeLikelihoodRatio configureGenotypeLikelihoodRatio(Path backgroundVariantFrequency, GenomeBuild genomeBuild, GenotypeLrProperties genotypeLrProperties) throws LiricalDataException { - BackgroundVariantFrequencyService backgroundVariantFrequencyService; - try (BufferedReader br = backgroundVariantFrequency == null - ? LoadUtils.openBundledBackgroundFrequencyFile(genomeBuild) - : Files.newBufferedReader(backgroundVariantFrequency)) { - Map frequencyMap = GenotypeDataIngestor.parse(br); - backgroundVariantFrequencyService = BackgroundVariantFrequencyService.of(frequencyMap, genotypeLrProperties.defaultVariantFrequency()); + private static Properties readProperties() { + Properties properties = new Properties(); + + try (InputStream is = LiricalBuilder.class.getResourceAsStream("/lirical.properties")) { + properties.load(is); } catch (IOException e) { - throw new LiricalDataException(e); + LOGGER.warn("Error loading properties: {}", e.getMessage()); } - GenotypeLikelihoodRatio.Options options = new GenotypeLikelihoodRatio.Options(genotypeLrProperties.pathogenicityThreshold(), genotypeLrProperties.strict()); - return new GenotypeLikelihoodRatio(backgroundVariantFrequencyService, options); + return properties; } - - } diff --git a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java index 3418002f0..256082d75 100644 --- a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java +++ b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java @@ -1,6 +1,5 @@ package org.monarchinitiative.lirical.configuration; -import org.monarchinitiative.lirical.core.model.GenomeBuild; import org.monarchinitiative.lirical.io.LiricalDataException; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoader; @@ -9,15 +8,10 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.svart.assembly.GenomicAssemblies; -import org.monarchinitiative.svart.assembly.GenomicAssembly; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.nio.file.Path; class LoadUtils { @@ -49,26 +43,4 @@ static HpoDiseases loadHpoDiseases(Path annotationPath, } } - static BufferedReader openBundledBackgroundFrequencyFile(GenomeBuild genomeBuild) throws LiricalDataException { - String name = switch (genomeBuild) { - case HG19 -> "/background/background-hg19.tsv"; - case HG38 -> "/background/background-hg38.tsv"; - }; - InputStream is = LiricalBuilder.class.getResourceAsStream(name); - if (is == null) - throw new LiricalDataException("Background file for " + genomeBuild + " is not present at '" + name + '\''); - LOGGER.debug("Loading bundled background variant frequencies from {}", name); - return new BufferedReader(new InputStreamReader(is)); - } - - static GenomicAssembly parseSvartGenomicAssembly(GenomeBuild genomeAssembly) { - switch (genomeAssembly) { - case HG19: - return GenomicAssemblies.GRCh37p13(); - default: - LOGGER.warn("Unknown genome assembly {}. Falling back to GRCh38", genomeAssembly); - case HG38: - return GenomicAssemblies.GRCh38p13(); - } - } } diff --git a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/impl/BundledBackgroundVariantFrequencyServiceFactory.java b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/impl/BundledBackgroundVariantFrequencyServiceFactory.java new file mode 100644 index 000000000..592b6d2f3 --- /dev/null +++ b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/impl/BundledBackgroundVariantFrequencyServiceFactory.java @@ -0,0 +1,54 @@ +package org.monarchinitiative.lirical.configuration.impl; + +import org.monarchinitiative.lirical.core.exception.LiricalRuntimeException; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.service.BackgroundVariantFrequencyService; +import org.monarchinitiative.lirical.core.service.BackgroundVariantFrequencyServiceFactory; +import org.monarchinitiative.lirical.io.background.BackgroundVariantFrequencyParser; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Map; +import java.util.Optional; + +public class BundledBackgroundVariantFrequencyServiceFactory implements BackgroundVariantFrequencyServiceFactory { + + private static final Logger LOGGER = LoggerFactory.getLogger(BundledBackgroundVariantFrequencyServiceFactory.class); + + private static final BundledBackgroundVariantFrequencyServiceFactory INSTANCE = new BundledBackgroundVariantFrequencyServiceFactory(); + + public static BundledBackgroundVariantFrequencyServiceFactory getInstance() { + return INSTANCE; + } + + private BundledBackgroundVariantFrequencyServiceFactory() { + } + + @Override + public Optional forGenomeBuild(GenomeBuild genomeBuild, double defaultVariantBackgroundFrequency) { + try (BufferedReader br = openBundledBackgroundFrequencyFile(genomeBuild)) { + Map frequencyMap = BackgroundVariantFrequencyParser.parse(br); + return Optional.of(BackgroundVariantFrequencyService.of(frequencyMap, defaultVariantBackgroundFrequency)); + } catch (IOException e) { + LOGGER.warn("Cannot configure background variant frequency service for {}: {}", genomeBuild, e.getMessage(), e); + return Optional.empty(); + } + } + + private static BufferedReader openBundledBackgroundFrequencyFile(GenomeBuild genomeBuild) { + String name = switch (genomeBuild) { + case HG19 -> "/background/background-hg19.tsv"; + case HG38 -> "/background/background-hg38.tsv"; + }; + InputStream is = BundledBackgroundVariantFrequencyServiceFactory.class.getResourceAsStream(name); + if (is == null) + throw new LiricalRuntimeException("Background file for " + genomeBuild + " is not present at '" + name + '\''); + LOGGER.debug("Loading bundled background variant frequencies from {}", name); + return new BufferedReader(new InputStreamReader(is)); + } +} diff --git a/lirical-cli/src/main/resources/lirical.properties b/lirical-configuration/src/main/resources/lirical.properties similarity index 100% rename from lirical-cli/src/main/resources/lirical.properties rename to lirical-configuration/src/main/resources/lirical.properties diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java index 66d516519..267c29478 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java @@ -1,11 +1,11 @@ package org.monarchinitiative.lirical.core; import org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunner; +import org.monarchinitiative.lirical.core.analysis.impl.LiricalAnalysisRunnerImpl; +import org.monarchinitiative.lirical.core.exception.LiricalRuntimeException; import org.monarchinitiative.lirical.core.io.VariantParserFactory; import org.monarchinitiative.lirical.core.output.AnalysisResultWriterFactory; -import org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotator; -import org.monarchinitiative.lirical.core.service.PhenotypeService; -import org.monarchinitiative.lirical.core.service.VariantMetadataService; +import org.monarchinitiative.lirical.core.service.*; import java.util.Objects; import java.util.Optional; @@ -14,56 +14,97 @@ public class Lirical { private final VariantParserFactory variantParserFactory; private final PhenotypeService phenotypeService; - private final FunctionalVariantAnnotator functionalVariantAnnotator; - private final VariantMetadataService variantMetadataService; + private final VariantMetadataServiceFactory variantMetadataServiceFactory; private final LiricalAnalysisRunner analysisRunner; private final AnalysisResultWriterFactory analysisResultWriterFactory; + private final String version; // nullable + /** + * @deprecated use {@link #of(VariantParserFactory, PhenotypeService, BackgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory, AnalysisResultWriterFactory, String)} } + * instead + */ + @Deprecated(since = "2.0.0-RC2", forRemoval = true) public static Lirical of(VariantParserFactory variantParserFactory, PhenotypeService phenotypeService, FunctionalVariantAnnotator functionalVariantAnnotator, VariantMetadataService variantMetadataService, LiricalAnalysisRunner analysisRunner, AnalysisResultWriterFactory analysisResultWriterFactory) { + throw new LiricalRuntimeException("Sorry, this static constructor has been deprecated!"); + } + + /** + * @deprecated use {@link #of(VariantParserFactory, PhenotypeService, BackgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory, AnalysisResultWriterFactory, String)} instead. + */ + @Deprecated(since = "2.0.0-RC2", forRemoval = true) + public static Lirical of(VariantParserFactory variantParserFactory, + PhenotypeService phenotypeService, + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, + VariantMetadataServiceFactory variantMetadataService, + AnalysisResultWriterFactory analysisResultWriterFactory) { + return of(variantParserFactory, + phenotypeService, + backgroundVariantFrequencyServiceFactory, + variantMetadataService, + analysisResultWriterFactory, + null); + } + + public static Lirical of(VariantParserFactory variantParserFactory, + PhenotypeService phenotypeService, + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, + VariantMetadataServiceFactory variantMetadataService, + AnalysisResultWriterFactory analysisResultWriterFactory, + String version) { return new Lirical(variantParserFactory, phenotypeService, - functionalVariantAnnotator, + backgroundVariantFrequencyServiceFactory, variantMetadataService, - analysisRunner, - analysisResultWriterFactory); + analysisResultWriterFactory, + version); } private Lirical(VariantParserFactory variantParserFactory, PhenotypeService phenotypeService, - FunctionalVariantAnnotator functionalVariantAnnotator, - VariantMetadataService variantMetadataService, - LiricalAnalysisRunner analysisRunner, - AnalysisResultWriterFactory analysisResultWriterFactory) { - this.variantParserFactory = variantParserFactory; // nullable + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, + VariantMetadataServiceFactory variantMetadataServiceFactory, + AnalysisResultWriterFactory analysisResultWriterFactory, + String version) { + this.variantParserFactory = Objects.requireNonNull(variantParserFactory); this.phenotypeService = Objects.requireNonNull(phenotypeService); - this.functionalVariantAnnotator = Objects.requireNonNull(functionalVariantAnnotator); - this.variantMetadataService = Objects.requireNonNull(variantMetadataService); - this.analysisRunner = Objects.requireNonNull(analysisRunner); + this.variantMetadataServiceFactory = Objects.requireNonNull(variantMetadataServiceFactory); + this.version = version; // nullable + this.analysisRunner = LiricalAnalysisRunnerImpl.of(phenotypeService, backgroundVariantFrequencyServiceFactory); this.analysisResultWriterFactory = Objects.requireNonNull(analysisResultWriterFactory); } /** - * @return variant parser factory if Exomiser variant database is present. Otherwise, an empty optional is returned. + * @return variant parser factory for parsing variants for LIRICAL analysis. */ - public Optional variantParserFactory() { - return Optional.ofNullable(variantParserFactory); + public VariantParserFactory variantParserFactory() { + return variantParserFactory; } public PhenotypeService phenotypeService() { return phenotypeService; } + @Deprecated(since = "2.0.0-RC2", forRemoval = true) public FunctionalVariantAnnotator functionalVariantAnnotator() { - return functionalVariantAnnotator; + return null; } + /** + * + * @deprecated use {@link #variantMetadataServiceFactory()} instead + */ + @Deprecated(since = "2.0.0-RC2", forRemoval = true) public VariantMetadataService variantMetadataService() { - return variantMetadataService; + return null; + } + + public VariantMetadataServiceFactory variantMetadataServiceFactory() { + return variantMetadataServiceFactory; } public LiricalAnalysisRunner analysisRunner() { @@ -73,4 +114,8 @@ public LiricalAnalysisRunner analysisRunner() { public AnalysisResultWriterFactory analysisResultsWriterFactory() { return analysisResultWriterFactory; } + + public Optional version() { + return Optional.ofNullable(version); + } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java index f7a423bcf..812b2da0a 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java @@ -1,8 +1,14 @@ package org.monarchinitiative.lirical.core.analysis; import org.monarchinitiative.lirical.core.analysis.probability.PretestDiseaseProbability; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; +import org.monarchinitiative.lirical.core.service.VariantMetadataService; +import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.Objects; +import java.util.*; /** * A container for analysis-specific settings, i.e. settings that need to be changed for analysis of each sample. @@ -10,7 +16,7 @@ public interface AnalysisOptions { /** - * @deprecated to be removed in 2.0.0, use {@link #of(boolean, PretestDiseaseProbability, boolean, float)} instead. + * @deprecated to be removed in 2.0.0, use {@link #builder()} instead. */ @Deprecated(forRemoval = true) static AnalysisOptions of(boolean useGlobal, PretestDiseaseProbability pretestDiseaseProbability) { @@ -18,24 +24,83 @@ static AnalysisOptions of(boolean useGlobal, PretestDiseaseProbability pretestDi } /** - * @deprecated to be removed in 2.0.0, use {@link #of(boolean, PretestDiseaseProbability, boolean, float)} instead. + * @deprecated to be removed in 2.0.0, use {@link #builder()} instead. */ @Deprecated(forRemoval = true) static AnalysisOptions of(boolean useGlobal, PretestDiseaseProbability pretestDiseaseProbability, boolean disregardDiseaseWithNoDeleteriousVariants) { Objects.requireNonNull(pretestDiseaseProbability); - return new AnalysisOptionsDefault(useGlobal, pretestDiseaseProbability, disregardDiseaseWithNoDeleteriousVariants, .8f); + return of(useGlobal, pretestDiseaseProbability, disregardDiseaseWithNoDeleteriousVariants, .8f); } + /** + * @deprecated to be removed in 2.0.0, use the {@link #builder()} instead. + */ + @Deprecated(forRemoval = true) static AnalysisOptions of(boolean useGlobal, PretestDiseaseProbability pretestDiseaseProbability, boolean disregardDiseaseWithNoDeleteriousVariants, float pathogenicityThreshold) { Objects.requireNonNull(pretestDiseaseProbability); - return new AnalysisOptionsDefault(useGlobal, pretestDiseaseProbability, disregardDiseaseWithNoDeleteriousVariants, pathogenicityThreshold); + Objects.requireNonNull(pretestDiseaseProbability); + return new AnalysisOptionsDefault(GenomeBuild.HG38, + TranscriptDatabase.REFSEQ, + Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.DECIPHER), + pathogenicityThreshold, + .1, + false, + useGlobal, + pretestDiseaseProbability, + disregardDiseaseWithNoDeleteriousVariants); } + static Builder builder() { + return new Builder(); + } + + /** + * @return genomic build that should be used in this analysis. + */ + GenomeBuild genomeBuild(); + + /** + * @return the transcript database that should be used in this analysis. + */ + TranscriptDatabase transcriptDatabase(); + + /** + * @return evaluate the patient wrt. diseases from given source(s). + */ + Set diseaseDatabases(); + + /** + * @return a variant frequency to assume for the variants with no available frequency data. + * @deprecated the parameter has been deprecated in favor of a constant in {@link VariantMetadataService#DEFAULT_FREQUENCY}. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") + default float defaultVariantAlleleFrequency() { + return Float.NaN; + } + + /** + * @return threshold for determining if the variant is deleterious or not. + * The threshold range must be in range of [0,1]. + */ + float variantDeleteriousnessThreshold(); + + /** + * @return default frequency of called-pathogenic variants in the general population o use if for whatever reason, + * data was not available in gnomAD. + */ + double defaultVariantBackgroundFrequency(); + + /** + * @return true if strict penalties should be used if the genotype does not match the disease model + * in terms of number of called pathogenic alleles. + */ + boolean useStrictPenalties(); + /** * @return true if the global analysis mode should be used. */ @@ -58,7 +123,121 @@ static AnalysisOptions of(boolean useGlobal, * Variant with pathogenicity value greater or equal to this threshold is considered deleterious. * * @return variant pathogenicity threshold value. + * @deprecated use {@link #variantDeleteriousnessThreshold()} instead. + */ + @Deprecated(since = "2.0.0-RC2", forRemoval = true) + default float pathogenicityThreshold() { + return variantDeleteriousnessThreshold(); + } + + /** + * A builder for {@link AnalysisOptions}. */ - float pathogenicityThreshold(); + class Builder { + + private static final Logger LOGGER = LoggerFactory.getLogger(Builder.class); + + private GenomeBuild genomeBuild = GenomeBuild.HG38; + private TranscriptDatabase transcriptDatabase = TranscriptDatabase.REFSEQ; + private final Set diseaseDatabases = new HashSet<>(List.of(DiseaseDatabase.OMIM, DiseaseDatabase.DECIPHER)); + private float variantDeleteriousnessThreshold = .8f; + private double defaultVariantBackgroundFrequency = .1; + private boolean useStrictPenalties = false; + private boolean useGlobal = false; + private PretestDiseaseProbability pretestDiseaseProbability = null; + private boolean disregardDiseaseWithNoDeleteriousVariants = true; + + private Builder() { + } + + public Builder genomeBuild(GenomeBuild genomeBuild) { + if (genomeBuild == null) { + LOGGER.warn("Cannot set genome build to `null`. Retaining {}", this.genomeBuild); + return this; + } + this.genomeBuild = genomeBuild; + return this; + } + + public Builder transcriptDatabase(TranscriptDatabase transcriptDatabase) { + if (transcriptDatabase == null) { + LOGGER.warn("Cannot set transcript database to `null`. Retaining {}", this.transcriptDatabase); + return this; + } + this.transcriptDatabase = transcriptDatabase; + return this; + } + + public Builder clearDiseaseDatabases() { + this.diseaseDatabases.clear(); + return this; + } + + public Builder addDiseaseDatabases(DiseaseDatabase... diseaseDatabases) { + return addDiseaseDatabases(Arrays.asList(diseaseDatabases)); + } + + public Builder addDiseaseDatabases(Collection diseaseDatabases) { + if (diseaseDatabases == null) { + LOGGER.warn("Disease databases should not be `null`!"); + return this; + } + this.diseaseDatabases.addAll(diseaseDatabases); + return this; + } + + public Builder setDiseaseDatabases(Collection diseaseDatabases) { + if (diseaseDatabases == null) { + LOGGER.warn("Disease databases must not be `null`!"); + return this; + } + this.diseaseDatabases.clear(); + this.diseaseDatabases.addAll(diseaseDatabases); + return this; + } + + public Builder variantDeleteriousnessThreshold(float variantDeleteriousnessThreshold) { + this.variantDeleteriousnessThreshold = variantDeleteriousnessThreshold; + return this; + } + + public Builder defaultVariantBackgroundFrequency(double defaultVariantBackgroundFrequency) { + this.defaultVariantBackgroundFrequency = defaultVariantBackgroundFrequency; + return this; + } + + public Builder useStrictPenalties(boolean useStrictPenalties) { + this.useStrictPenalties = useStrictPenalties; + return this; + } + + public Builder useGlobal(boolean useGlobal) { + this.useGlobal = useGlobal; + return this; + } + + public Builder pretestProbability(PretestDiseaseProbability pretestDiseaseProbability) { + this.pretestDiseaseProbability = pretestDiseaseProbability; + return this; + } + + + public Builder disregardDiseaseWithNoDeleteriousVariants(boolean disregardDiseaseWithNoDeleteriousVariants) { + this.disregardDiseaseWithNoDeleteriousVariants = disregardDiseaseWithNoDeleteriousVariants; + return this; + } + + public AnalysisOptions build() { + return new AnalysisOptionsDefault(genomeBuild, + transcriptDatabase, + diseaseDatabases, + variantDeleteriousnessThreshold, + defaultVariantBackgroundFrequency, + useStrictPenalties, + useGlobal, + pretestDiseaseProbability, + disregardDiseaseWithNoDeleteriousVariants); + } + } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java index e8b6f648b..24f44e308 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java @@ -1,12 +1,21 @@ package org.monarchinitiative.lirical.core.analysis; import org.monarchinitiative.lirical.core.analysis.probability.PretestDiseaseProbability; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; +import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; + +import java.util.Set; record AnalysisOptionsDefault( + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDatabase, + Set diseaseDatabases, + float variantDeleteriousnessThreshold, + double defaultVariantBackgroundFrequency, + boolean useStrictPenalties, boolean useGlobal, PretestDiseaseProbability pretestDiseaseProbability, - boolean disregardDiseaseWithNoDeleteriousVariants, - float pathogenicityThreshold + boolean disregardDiseaseWithNoDeleteriousVariants ) implements AnalysisOptions { - } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java index fda1ecc9f..acba94f4a 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java @@ -1,7 +1,25 @@ package org.monarchinitiative.lirical.core.analysis; +import org.monarchinitiative.lirical.core.exception.LiricalAnalysisException; + +/** + * The analysis runner runs LIRICAL analysis on provided analysis subject ({@link AnalysisData}). The analysis + * is parametrized by {@link AnalysisOptions}. The runner throws {@link LiricalAnalysisException} if the analysis + * cannot be run as dictated by the options. + */ public interface LiricalAnalysisRunner { - AnalysisResults run(AnalysisData analysisData, AnalysisOptions analysisOptions); + /** + * Run analysis parametrized by {@code analysisOptions} on {@code analysisData}. + * + * @param analysisData data representing the analysis subject. + * @param analysisOptions analysis parameters. + * @return a container with results for each evaluated disease + * @throws LiricalAnalysisException if the analysis cannot be run, e.g. due to missing resource, + * such as {@linkplain org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotator} for a combination + * of {@linkplain org.monarchinitiative.lirical.core.model.GenomeBuild} + * and {@linkplain org.monarchinitiative.lirical.core.model.TranscriptDatabase} + */ + AnalysisResults run(AnalysisData analysisData, AnalysisOptions analysisOptions) throws LiricalAnalysisException; } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunnerImpl.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java similarity index 70% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunnerImpl.java rename to lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java index 5211ac440..1282df68e 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunnerImpl.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java @@ -1,10 +1,15 @@ -package org.monarchinitiative.lirical.core.analysis; +package org.monarchinitiative.lirical.core.analysis.impl; +import org.monarchinitiative.lirical.core.analysis.*; +import org.monarchinitiative.lirical.core.exception.LiricalAnalysisException; import org.monarchinitiative.lirical.core.likelihoodratio.*; import org.monarchinitiative.lirical.core.model.Gene2Genotype; import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.service.BackgroundVariantFrequencyServiceFactory; import org.monarchinitiative.lirical.core.service.PhenotypeService; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; +import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -19,36 +24,46 @@ public class LiricalAnalysisRunnerImpl implements LiricalAnalysisRunner { private static final Logger LOGGER = LoggerFactory.getLogger(LiricalAnalysisRunnerImpl.class); private final PhenotypeService phenotypeService; + private final BackgroundVariantFrequencyServiceFactory bgFreqFactory; private final PhenotypeLikelihoodRatio phenotypeLrEvaluator; - private final GenotypeLikelihoodRatio genotypeLikelihoodRatio; private final ForkJoinPool pool; public static LiricalAnalysisRunnerImpl of(PhenotypeService phenotypeService, - PhenotypeLikelihoodRatio phenotypeLrEvaluator, - GenotypeLikelihoodRatio genotypeLikelihoodRatio) { - return new LiricalAnalysisRunnerImpl(phenotypeService, phenotypeLrEvaluator, genotypeLikelihoodRatio); + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory) { + return new LiricalAnalysisRunnerImpl(phenotypeService, backgroundVariantFrequencyServiceFactory); } private LiricalAnalysisRunnerImpl(PhenotypeService phenotypeService, - PhenotypeLikelihoodRatio phenotypeLrEvaluator, - GenotypeLikelihoodRatio genotypeLikelihoodRatio) { + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory) { this.phenotypeService = Objects.requireNonNull(phenotypeService); - this.phenotypeLrEvaluator = Objects.requireNonNull(phenotypeLrEvaluator); - this.genotypeLikelihoodRatio = Objects.requireNonNull(genotypeLikelihoodRatio); + this.phenotypeLrEvaluator = new PhenotypeLikelihoodRatio(phenotypeService.hpo(), phenotypeService.diseases()); + this.bgFreqFactory = backgroundVariantFrequencyServiceFactory; + // TODO - set parallelism int parallelism = Runtime.getRuntime().availableProcessors(); LOGGER.debug("Creating LIRICAL pool with {} workers.", parallelism); this.pool = new ForkJoinPool(parallelism, LiricalWorkerThread::new, null, false); } @Override - public AnalysisResults run(AnalysisData data, AnalysisOptions options) { + public AnalysisResults run(AnalysisData data, AnalysisOptions options) throws LiricalAnalysisException { + Collection diseaseDatabasePrefixes = options.diseaseDatabases().stream() + .map(DiseaseDatabase::prefix) + .toList(); Map> diseaseToGenotype = groupDiseasesByGene(data.genes()); + Optional genotypeLikelihoodRatio = configureGenotypeLikelihoodRatio(options.genomeBuild(), + options.variantDeleteriousnessThreshold(), + options.defaultVariantBackgroundFrequency(), + options.useStrictPenalties()); + if (genotypeLikelihoodRatio.isEmpty()) + throw new LiricalAnalysisException("Cannot configure genotype LR for %s".formatted(options.genomeBuild())); + ProgressReporter progressReporter = new ProgressReporter(1_000, "diseases"); Stream testResultStream = phenotypeService.diseases().hpoDiseases() .parallel() // why not? + .filter(d -> diseaseDatabasePrefixes.contains(d.id().getPrefix())) .peek(d -> progressReporter.log()) - .map(disease -> analyzeDisease(disease, data, options, diseaseToGenotype)) + .map(disease -> analyzeDisease(genotypeLikelihoodRatio.get(), disease, data, options, diseaseToGenotype)) .flatMap(Optional::stream); try { @@ -76,7 +91,8 @@ private Map> groupDiseasesByGene(GenesAndGenotypes g return diseaseToGenotype; } - private Optional analyzeDisease(HpoDisease disease, + private Optional analyzeDisease(GenotypeLikelihoodRatio genotypeLikelihoodRatio, + HpoDisease disease, AnalysisData analysisData, AnalysisOptions options, Map> diseaseToGenotype) { @@ -105,7 +121,7 @@ private Optional analyzeDisease(HpoDisease disease, if (options.disregardDiseaseWithNoDeleteriousVariants()) { // has at least one pathogenic clinvar variant or predicted pathogenic variant? if (g2g.pathogenicClinVarCount(analysisData.sampleId()) > 0 - || g2g.pathogenicAlleleCount(analysisData.sampleId(), options.pathogenicityThreshold()) > 0) { + || g2g.pathogenicAlleleCount(analysisData.sampleId(), options.variantDeleteriousnessThreshold()) > 0) { noPredictedDeleteriousVariantsWereFound = false; } } @@ -154,4 +170,16 @@ private static GenotypeLrWithExplanation takeNonNullOrGreaterLr(GenotypeLrWithEx ? base : candidate; } + + private Optional configureGenotypeLikelihoodRatio(GenomeBuild genomeBuild, + float deleteriousnessThreshold, + double defaultVariantBackgroundFrequency, + boolean strict) { + return bgFreqFactory.forGenomeBuild(genomeBuild, defaultVariantBackgroundFrequency) + .map(bgFreqService -> { + GenotypeLikelihoodRatio.Options options = new GenotypeLikelihoodRatio.Options(deleteriousnessThreshold, strict); + return new GenotypeLikelihoodRatio(bgFreqService, options); + }); + } + } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalWorkerThread.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalWorkerThread.java similarity index 91% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalWorkerThread.java rename to lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalWorkerThread.java index 1f4c6c369..c419342b8 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalWorkerThread.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalWorkerThread.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.core.analysis; +package org.monarchinitiative.lirical.core.analysis.impl; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.ForkJoinWorkerThread; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java new file mode 100644 index 000000000..0d8c4c51f --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java @@ -0,0 +1,31 @@ +package org.monarchinitiative.lirical.core.exception; + +/** + * An exception thrown by {@link org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunner} if the analysis + * cannot be run. + */ +public class LiricalAnalysisException extends LiricalException { + public LiricalAnalysisException() { + super(); + } + + public LiricalAnalysisException(String message) { + super(message); + } + + public LiricalAnalysisException(String message, Exception e) { + super(message, e); + } + + public LiricalAnalysisException(String message, Throwable cause) { + super(message, cause); + } + + public LiricalAnalysisException(Throwable cause) { + super(cause); + } + + protected LiricalAnalysisException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/NoOpVariantParserFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/NoOpVariantParserFactory.java new file mode 100644 index 000000000..e2cffe38f --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/NoOpVariantParserFactory.java @@ -0,0 +1,25 @@ +package org.monarchinitiative.lirical.core.io; + +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; + +import java.nio.file.Path; +import java.util.Optional; + +class NoOpVariantParserFactory implements VariantParserFactory { + + private static final NoOpVariantParserFactory INSTANCE = new NoOpVariantParserFactory(); + + static NoOpVariantParserFactory getInstance() { + return INSTANCE; + } + + private NoOpVariantParserFactory() { + } + + @Override + public Optional forPath(Path variantResource, GenomeBuild genomeBuild, TranscriptDatabase transcriptDatabase) { + return Optional.empty(); + } + +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/VariantParserFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/VariantParserFactory.java index dbccd2858..ce8f2430d 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/VariantParserFactory.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/VariantParserFactory.java @@ -1,18 +1,27 @@ package org.monarchinitiative.lirical.core.io; import org.monarchinitiative.lirical.core.model.GenomeBuild; -import org.monarchinitiative.svart.assembly.GenomicAssembly; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; import java.nio.file.Path; +import java.util.Optional; public interface VariantParserFactory { - GenomicAssembly genomicAssembly(); + /** + * Get a factory that can be used when LIRICAL seems to be configured for phenotype only analyses. + * + * @return a {@link VariantParserFactory} that never provides a {@link VariantParser}. + */ + static VariantParserFactory noOpFactory() { + return NoOpVariantParserFactory.getInstance(); + } + + Optional forPath(Path variantResource, GenomeBuild genomeBuild, TranscriptDatabase transcriptDatabase); - default GenomeBuild genomeBuild() { - return GenomeBuild.parse(genomicAssembly().name()) - .orElseThrow(() -> new IllegalArgumentException("Unknown genomic assembly '" + genomicAssembly().name()+ '\'')); + @Deprecated(forRemoval = true) + default VariantParser forPath(Path variantResource) { + return forPath(variantResource, GenomeBuild.HG38, TranscriptDatabase.REFSEQ).orElse(null); } - VariantParser forPath(Path variantResource); } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java index 0d4ec9c81..f5d68c946 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java @@ -6,29 +6,36 @@ /** * Convenience class to represent the age of a proband. Note that if (@link #initialized} is false, * then we are representing the fact that we do not know the age we will disregard the feature - * in our calculations. We will represent prenatal age as negative values. + * in our calculations. We will represent prenatal age as number of completed gestational weeks and days, + * and {@link #isGestational()} flag will be set. * @author Peter Robinson */ public class Age { private final boolean isUnknown; + private final boolean isGestational; private final int years; private final int months; + private final int weeks; private final int days; /** Used as a constant if we do not have information about the age of a proband. */ private final static Age NOT_KNOWN = new Age(); - private Age(int years, int months, int days) { + private Age(int years, int months, int weeks, int days) { this.years=years; this.months=months; + this.weeks=weeks; this.days=days; this.isUnknown = false; + this.isGestational = weeks != 0; } private Age() { this.years=0; this.months=0; + this.weeks=0; this.days=0; this.isUnknown = true; + this.isGestational = false; } public static Age ageNotKnown() { @@ -43,6 +50,10 @@ public int getMonths() { return months; } + public int getWeeks() { + return weeks; + } + public int getDays() { return days; } @@ -51,6 +62,14 @@ public boolean isUnknown() { return isUnknown; } + public boolean isGestational() { + return isGestational; + } + + public boolean isPostnatal() { + return !isGestational; + } + public static Age ageInYears(int y) { return of(y,0,0); } @@ -63,13 +82,24 @@ public static Age ageInDays(int d) { return of(0,0,d); } + /** + * @param period representing postnatal (not gestational) age. + * @return age object + */ public static Age parse(Period period) { Period normalized = period.normalized(); return of(normalized.getYears(), normalized.getMonths(), normalized.getDays()); } + public static Age gestationalAge(int weeks, int days) { + return new Age(0, 0, weeks, days); + } + + /** + * Create a postnatal age from given inputs. + */ public static Age of(int years, int months, int days) { - return new Age(years, months, days); + return new Age(years, months, 0, days); } @Override @@ -80,12 +110,13 @@ public boolean equals(Object o) { return isUnknown == age.isUnknown && years == age.years && months == age.months && + weeks == age.weeks && days == age.days; } @Override public int hashCode() { - return Objects.hash(isUnknown, years, months, days); + return Objects.hash(isUnknown, years, months, weeks, days); } @Override @@ -94,6 +125,7 @@ public String toString() { "isUnknown=" + isUnknown + ", years=" + years + ", months=" + months + + ", weeks=" + weeks + ", days=" + days + '}'; } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/TranscriptDatabase.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/TranscriptDatabase.java new file mode 100644 index 000000000..abbac7daf --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/TranscriptDatabase.java @@ -0,0 +1,32 @@ +package org.monarchinitiative.lirical.core.model; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Optional; + +public enum TranscriptDatabase { + UCSC, + REFSEQ; + + private static final Logger LOGGER = LoggerFactory.getLogger(TranscriptDatabase.class); + + @Override + public String toString() { + return switch (this) { + case UCSC -> "ucsc"; + case REFSEQ -> "RefSeq"; + }; + } + + public static Optional parse(String value) { + return switch (value.toLowerCase()) { + case "ucsc" -> Optional.of(UCSC); + case "refseq" -> Optional.of(REFSEQ); + default -> { + LOGGER.warn("Unknown transcript database"); + yield Optional.empty(); + } + }; + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java index 8c9025cca..4b3c6fb87 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java @@ -3,7 +3,7 @@ import java.util.Objects; import java.util.Optional; -public class VariantMetadataDefault implements VariantMetadata { +class VariantMetadataDefault implements VariantMetadata { private static final VariantMetadataDefault EMPTY = new VariantMetadataDefault(Float.NaN, Float.NaN, ClinvarClnSig.NOT_PROVIDED); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultWriterFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultWriterFactory.java index 3dc9b54f7..9ba4960be 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultWriterFactory.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultWriterFactory.java @@ -1,5 +1,8 @@ package org.monarchinitiative.lirical.core.output; +import org.monarchinitiative.lirical.core.analysis.AnalysisData; +import org.monarchinitiative.lirical.core.analysis.AnalysisResults; + import java.util.Optional; /** @@ -13,4 +16,16 @@ public interface AnalysisResultWriterFactory { */ Optional getWriter(OutputFormat outputFormat); + /** + * Since deprecation, the method always throws a {@link RuntimeException}. + * + * @deprecated use {@link #getWriter(OutputFormat)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") + default AnalysisResultsWriter getWriter(AnalysisData analysisData, + AnalysisResults analysisResults, + AnalysisResultsMetadata metadata) { + throw new RuntimeException("The method has been deprecated."); + } + } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsWriter.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsWriter.java index 6fbe70fdc..f681cea13 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsWriter.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsWriter.java @@ -21,4 +21,14 @@ void process(AnalysisData analysisData, AnalysisResultsMetadata metadata, OutputOptions outputOptions) throws IOException; + /** + * Since deprecation, the method always throws a {@link RuntimeException}. + * + * @deprecated use {@link #process(AnalysisData, AnalysisResults, AnalysisResultsMetadata, OutputOptions)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") + default void process(OutputOptions outputOptions) { + throw new RuntimeException("The method has been deprecated."); + } + } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/OutputOptions.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/OutputOptions.java index 80e7f0810..c3f49fdab 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/OutputOptions.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/OutputOptions.java @@ -1,11 +1,100 @@ package org.monarchinitiative.lirical.core.output; import java.nio.file.Path; +import java.util.Objects; + +public final class OutputOptions { + private final LrThreshold lrThreshold; + private final MinDiagnosisCount minDiagnosisCount; + private final float pathogenicityThreshold; + private final boolean displayAllVariants; + private final Path outputDirectory; + private final String prefix; + + public OutputOptions(LrThreshold lrThreshold, + MinDiagnosisCount minDiagnosisCount, + float pathogenicityThreshold, + boolean displayAllVariants, + Path outputDirectory, + String prefix) { + this.lrThreshold = lrThreshold; + this.minDiagnosisCount = minDiagnosisCount; + this.pathogenicityThreshold = pathogenicityThreshold; + this.displayAllVariants = displayAllVariants; + this.outputDirectory = outputDirectory; + this.prefix = prefix; + } + + /** + * @deprecated use the {@link #OutputOptions(LrThreshold, MinDiagnosisCount, float, boolean, Path, String)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC2") + public OutputOptions(LrThreshold lrThreshold, + MinDiagnosisCount minDiagnosisCount, + float pathogenicityThreshold, + boolean displayAllVariants, + Path outputDirectory, + String prefix, + Iterable outputFormats) { + this.lrThreshold = lrThreshold; + this.minDiagnosisCount = minDiagnosisCount; + this.pathogenicityThreshold = pathogenicityThreshold; + this.displayAllVariants = displayAllVariants; + this.outputDirectory = outputDirectory; + this.prefix = prefix; + } + + public LrThreshold lrThreshold() { + return lrThreshold; + } + + public MinDiagnosisCount minDiagnosisCount() { + return minDiagnosisCount; + } + + public float pathogenicityThreshold() { + return pathogenicityThreshold; + } + + public boolean displayAllVariants() { + return displayAllVariants; + } + + public Path outputDirectory() { + return outputDirectory; + } + + public String prefix() { + return prefix; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (OutputOptions) obj; + return Objects.equals(this.lrThreshold, that.lrThreshold) && + Objects.equals(this.minDiagnosisCount, that.minDiagnosisCount) && + Float.floatToIntBits(this.pathogenicityThreshold) == Float.floatToIntBits(that.pathogenicityThreshold) && + this.displayAllVariants == that.displayAllVariants && + Objects.equals(this.outputDirectory, that.outputDirectory) && + Objects.equals(this.prefix, that.prefix); + } + + @Override + public int hashCode() { + return Objects.hash(lrThreshold, minDiagnosisCount, pathogenicityThreshold, displayAllVariants, outputDirectory, prefix); + } + + @Override + public String toString() { + return "OutputOptions[" + + "lrThreshold=" + lrThreshold + ", " + + "minDiagnosisCount=" + minDiagnosisCount + ", " + + "pathogenicityThreshold=" + pathogenicityThreshold + ", " + + "displayAllVariants=" + displayAllVariants + ", " + + "outputDirectory=" + outputDirectory + ", " + + "prefix=" + prefix + ']'; + } -public record OutputOptions(LrThreshold lrThreshold, - MinDiagnosisCount minDiagnosisCount, - float pathogenicityThreshold, - boolean displayAllVariants, - Path outputDirectory, - String prefix) { } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariantDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariantDefault.java index 0dce25bab..fc724ff91 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariantDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariantDefault.java @@ -1,6 +1,7 @@ package org.monarchinitiative.lirical.core.output; import org.monarchinitiative.lirical.core.model.*; +import org.monarchinitiative.lirical.core.service.VariantMetadataService; import org.monarchinitiative.svart.CoordinateSystem; import org.monarchinitiative.svart.GenomicVariant; import org.monarchinitiative.svart.Strand; @@ -80,7 +81,7 @@ public float getPathogenicityScore() { @Override public float getFrequency() { - return variant.frequency().orElse(0f); + return variant.frequency().orElse(Float.NaN); } @Override diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/BackgroundVariantFrequencyServiceFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/BackgroundVariantFrequencyServiceFactory.java new file mode 100644 index 000000000..41f89e33f --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/BackgroundVariantFrequencyServiceFactory.java @@ -0,0 +1,11 @@ +package org.monarchinitiative.lirical.core.service; + +import org.monarchinitiative.lirical.core.model.GenomeBuild; + +import java.util.Optional; + +public interface BackgroundVariantFrequencyServiceFactory { + + Optional forGenomeBuild(GenomeBuild genomeBuild, double defaultVariantBackgroundFrequency); + +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/FunctionalVariantAnnotatorService.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/FunctionalVariantAnnotatorService.java new file mode 100644 index 000000000..0e4acca77 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/FunctionalVariantAnnotatorService.java @@ -0,0 +1,23 @@ +package org.monarchinitiative.lirical.core.service; + +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; + +import java.util.Optional; + +/** + * {@linkplain FunctionalVariantAnnotatorService} knows about {@link FunctionalVariantAnnotator}s for all genome build + * and transcript database combinations that have been configured for a given + * {@link org.monarchinitiative.lirical.core.Lirical} instance. + */ +public interface FunctionalVariantAnnotatorService { + + /** + * Get {@link FunctionalVariantAnnotator} for a combination of {@link GenomeBuild} and {@link TranscriptDatabase}. + *

+ * The returned value is empty if LIRICAL resources do not allow configuring the annotator for given combination. + */ + Optional getFunctionalAnnotator(GenomeBuild genomeBuild, + TranscriptDatabase transcriptDatabase); + +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/NoOpVariantMetadataServiceFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/NoOpVariantMetadataServiceFactory.java new file mode 100644 index 000000000..d24d31049 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/NoOpVariantMetadataServiceFactory.java @@ -0,0 +1,22 @@ +package org.monarchinitiative.lirical.core.service; + +import org.monarchinitiative.lirical.core.model.GenomeBuild; + +import java.util.Optional; + +class NoOpVariantMetadataServiceFactory implements VariantMetadataServiceFactory { + + private static final NoOpVariantMetadataServiceFactory INSTANCE = new NoOpVariantMetadataServiceFactory(); + + static NoOpVariantMetadataServiceFactory getInstance() { + return INSTANCE; + } + + private NoOpVariantMetadataServiceFactory() { + } + + @Override + public Optional getVariantMetadataService(GenomeBuild genomeBuild) { + return Optional.empty(); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/TranscriptDatabase.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/TranscriptDatabase.java index 7ce7bd0b1..00efe0282 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/TranscriptDatabase.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/TranscriptDatabase.java @@ -1,5 +1,9 @@ package org.monarchinitiative.lirical.core.service; +/** + * @deprecated use {@link org.monarchinitiative.lirical.core.model.TranscriptDatabase} instead. + */ +@Deprecated(since = "2.0.0-RC2", forRemoval = true) public enum TranscriptDatabase { UCSC, REFSEQ; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/VariantMetadataService.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/VariantMetadataService.java index f87eb21cc..e619c2cbb 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/VariantMetadataService.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/VariantMetadataService.java @@ -11,15 +11,16 @@ public interface VariantMetadataService { /** * We will assume a frequency of 1:100,000 if no frequency data is available. */ - float DEFAULT_FREQUENCY = 0.00001F; - + float DEFAULT_FREQUENCY = 1e-5f; + + @Deprecated(forRemoval = true, since = "2.0.0-RC2") static Options defaultOptions() { return new Options(DEFAULT_FREQUENCY); } VariantMetadata metadata(GenomicVariant variant, List effects); - + @Deprecated(forRemoval = true, since = "2.0.0-RC2") record Options(float defaultFrequency) { } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/VariantMetadataServiceFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/VariantMetadataServiceFactory.java new file mode 100644 index 000000000..4a6a51271 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/VariantMetadataServiceFactory.java @@ -0,0 +1,19 @@ +package org.monarchinitiative.lirical.core.service; + +import org.monarchinitiative.lirical.core.model.GenomeBuild; + +import java.util.Optional; + +/** + * {@linkplain VariantMetadataServiceFactory} knows about {@link VariantMetadataService}s for all genome builds + * that have been configured for a given {@link org.monarchinitiative.lirical.core.Lirical} instance. + */ +public interface VariantMetadataServiceFactory { + + static VariantMetadataServiceFactory noOpFactory() { + return NoOpVariantMetadataServiceFactory.getInstance(); + } + + Optional getVariantMetadataService(GenomeBuild genomeBuild); + +} diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/AgeTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/AgeTest.java index c21365f21..beb79469a 100644 --- a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/AgeTest.java +++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/AgeTest.java @@ -2,8 +2,7 @@ import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.*; public class AgeTest { @@ -13,22 +12,39 @@ public void testDays() { assertEquals(0, twoDays.getYears()); assertEquals(0,twoDays.getMonths()); assertEquals(2, twoDays.getDays()); + assertTrue(twoDays.isPostnatal()); + assertFalse(twoDays.isGestational()); } @Test public void testMonths() { - Age threeMonths = Age.ageInMonths(3); - assertEquals(0, threeMonths.getDays()); - assertEquals(3, threeMonths.getMonths()); - assertEquals(0, threeMonths.getYears()); + Age age = Age.ageInMonths(3); + assertEquals(0, age.getDays()); + assertEquals(3, age.getMonths()); + assertEquals(0, age.getYears()); + assertTrue(age.isPostnatal()); + assertFalse(age.isGestational()); } @Test public void testYears() { - Age thirteenYears = Age.ageInYears(13); - assertEquals(0, thirteenYears.getDays()); - assertEquals(0, thirteenYears.getMonths()); - assertEquals(13, thirteenYears.getYears()); + Age age = Age.ageInYears(13); + assertEquals(0, age.getDays()); + assertEquals(0, age.getMonths()); + assertEquals(13, age.getYears()); + assertTrue(age.isPostnatal()); + assertFalse(age.isGestational()); + } + + @Test + public void gestational() { + Age age = Age.gestationalAge(30, 2); + assertEquals(0, age.getYears()); + assertEquals(0, age.getMonths()); + assertEquals(30, age.getWeeks()); + assertEquals(2, age.getDays()); + assertFalse(age.isPostnatal()); + assertTrue(age.isGestational()); } @Test diff --git a/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java b/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java index 561681a8d..67fbf4ef5 100644 --- a/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java +++ b/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java @@ -7,8 +7,6 @@ import org.monarchinitiative.lirical.core.model.ClinvarClnSig; import org.monarchinitiative.lirical.core.model.VariantMetadata; import org.monarchinitiative.lirical.core.service.VariantMetadataService; -import org.monarchinitiative.lirical.exomiser_db_adapter.serializers.AlleleKeyDataType; -import org.monarchinitiative.lirical.exomiser_db_adapter.serializers.AllelePropertiesDataType; import org.monarchinitiative.lirical.exomiser_db_adapter.model.AlleleProtoAdaptor; import org.monarchinitiative.lirical.exomiser_db_adapter.model.frequency.FrequencyData; import org.monarchinitiative.lirical.exomiser_db_adapter.model.pathogenicity.ClinVarData; @@ -17,17 +15,16 @@ import org.monarchinitiative.svart.CoordinateSystem; import org.monarchinitiative.svart.GenomicVariant; import org.monarchinitiative.svart.Strand; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.nio.file.Path; import java.util.List; -import java.util.Objects; +/** + * @deprecated the class will be removed from the public API in v2.0.0. Use {@link ExomiserMvStoreMetadataServiceFactory} instead. + */ +@Deprecated public class ExomiserMvStoreMetadataService implements VariantMetadataService { - private static final Logger LOGGER = LoggerFactory.getLogger(ExomiserMvStoreMetadataService.class); - // Note: Repeated retrieval of AlleleProperties from MVMap will hopefully not pose a huge performance issue // since MVMap uses caching (16MB, 16 segments) by default. @@ -40,22 +37,19 @@ public class ExomiserMvStoreMetadataService implements VariantMetadataService { * A map with data from the Exomiser database. */ private final MVMap alleleMap; - private final Options options; - public static ExomiserMvStoreMetadataService of(Path mvStore, - Options options) { + public static ExomiserMvStoreMetadataService of(Path mvStore) { MVStore store = new MVStore.Builder() .fileName(mvStore.toAbsolutePath().toString()) .readOnly() .open(); store.setCacheSize(CACHE_SIZE); - return new ExomiserMvStoreMetadataService(store, options); + return new ExomiserMvStoreMetadataService(store); } - private ExomiserMvStoreMetadataService(MVStore mvStore, Options options) { + private ExomiserMvStoreMetadataService(MVStore mvStore) { this.alleleMap = MvStoreUtil.openAlleleMVMap(mvStore); - this.options = options; } @Override @@ -67,7 +61,7 @@ public VariantMetadata metadata(GenomicVariant variant, List effe ClinvarClnSig clinvarClnSig; if (alleleProp == null) { - frequency = options.defaultFrequency(); + frequency = DEFAULT_FREQUENCY; pathogenicity = effects.stream() .map(VariantEffectPathogenicityScore::getPathogenicityScoreOf) .max(Float::compareTo) diff --git a/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataServiceFactory.java b/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataServiceFactory.java new file mode 100644 index 000000000..e6f889a80 --- /dev/null +++ b/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataServiceFactory.java @@ -0,0 +1,31 @@ +package org.monarchinitiative.lirical.exomiser_db_adapter; + +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.service.VariantMetadataService; +import org.monarchinitiative.lirical.core.service.VariantMetadataServiceFactory; + +import java.nio.file.Path; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +public class ExomiserMvStoreMetadataServiceFactory implements VariantMetadataServiceFactory { + + private final Map exomiserDbPaths; + + public ExomiserMvStoreMetadataServiceFactory(Map exomiserDbPaths) { + this.exomiserDbPaths = Objects.requireNonNull(exomiserDbPaths); + } + + public static ExomiserMvStoreMetadataServiceFactory of(Map exomiserDbPaths) { + return new ExomiserMvStoreMetadataServiceFactory(exomiserDbPaths); + } + + @Override + public Optional getVariantMetadataService(GenomeBuild genomeBuild) { + Path path = exomiserDbPaths.get(genomeBuild); + return path != null + ? Optional.of(ExomiserMvStoreMetadataService.of(path)) + : Optional.empty(); + } +} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/LiricalDataResolver.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/LiricalDataResolver.java index b9fc10c0a..b3863a7ae 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/LiricalDataResolver.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/LiricalDataResolver.java @@ -41,6 +41,10 @@ private void checkV1Resources() throws LiricalDataException { } } + public Path dataDirectory() { + return dataDirectory; + } + /** * @deprecated use {@link #hpoJson()} */ @@ -82,14 +86,18 @@ public Path hg19RefseqTxDatabase() { return dataDirectory.resolve("hg19_refseq.ser"); } - private Path hg38RefseqTxDatabase() { + public Path hg38RefseqTxDatabase() { return dataDirectory.resolve("hg38_refseq.ser"); } - private Path hg38UcscTxDatabase() { + public Path hg38UcscTxDatabase() { return dataDirectory.resolve("hg38_ucsc.ser"); } + /** + * @deprecated use {@link #transcriptCacheFor(GenomeBuild, org.monarchinitiative.lirical.core.model.TranscriptDatabase)} instead + */ + @Deprecated(forRemoval = true) public Path transcriptCacheFor(GenomeBuild genomeBuild, TranscriptDatabase txDb) { return switch (genomeBuild) { case HG19 -> switch (txDb) { @@ -102,4 +110,17 @@ public Path transcriptCacheFor(GenomeBuild genomeBuild, TranscriptDatabase txDb) }; }; } + + public Path transcriptCacheFor(GenomeBuild genomeBuild, org.monarchinitiative.lirical.core.model.TranscriptDatabase txDb) { + return switch (genomeBuild) { + case HG19 -> switch (txDb) { + case UCSC -> hg19UcscTxDatabase(); + case REFSEQ -> hg19RefseqTxDatabase(); + }; + case HG38 -> switch (txDb) { + case UCSC -> hg38UcscTxDatabase(); + case REFSEQ -> hg38RefseqTxDatabase(); + }; + }; + } } diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/GenotypeDataIngestor.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/background/BackgroundVariantFrequencyParser.java similarity index 82% rename from lirical-io/src/main/java/org/monarchinitiative/lirical/io/GenotypeDataIngestor.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/background/BackgroundVariantFrequencyParser.java index 94f053465..741b4e987 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/GenotypeDataIngestor.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/background/BackgroundVariantFrequencyParser.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.io; +package org.monarchinitiative.lirical.io.background; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; @@ -11,17 +11,16 @@ /** * This class coordinates the input of the background frequency file. Note that this file is added as a resource to the - * JAR file, i.e., {@code LIRICAL.jar!/background/background-hg19.tsv} (or -hg38.tsv), and so it cannot be opened using - * a path. The user is allowed to provide their own background file, in which case a path is used. There are two - * factory methods, one for the path and one for the name of a resource (both are strings). + * JAR file, i.e., {@code /background/background-hg19.tsv}. + * * @author Peter Robinson */ -public class GenotypeDataIngestor { - private static final Logger logger = LoggerFactory.getLogger(GenotypeDataIngestor.class); +public class BackgroundVariantFrequencyParser { + private static final Logger logger = LoggerFactory.getLogger(BackgroundVariantFrequencyParser.class); private final static String ENTREZ_GENE_PREFIX="NCBIGene"; - private GenotypeDataIngestor() { + private BackgroundVariantFrequencyParser() { } /** diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/background/CustomBackgroundVariantFrequencyServiceFactory.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/background/CustomBackgroundVariantFrequencyServiceFactory.java new file mode 100644 index 000000000..39f4cfd2d --- /dev/null +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/background/CustomBackgroundVariantFrequencyServiceFactory.java @@ -0,0 +1,51 @@ +package org.monarchinitiative.lirical.io.background; + +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.service.BackgroundVariantFrequencyService; +import org.monarchinitiative.lirical.core.service.BackgroundVariantFrequencyServiceFactory; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +/** + * A {@link BackgroundVariantFrequencyServiceFactory} implementation that uses user-provided frequency files. + */ +public class CustomBackgroundVariantFrequencyServiceFactory implements BackgroundVariantFrequencyServiceFactory { + + private static final Logger LOGGER = LoggerFactory.getLogger(CustomBackgroundVariantFrequencyServiceFactory.class); + + private final Map backgroundFilePaths; + + public static CustomBackgroundVariantFrequencyServiceFactory of(Map backgroundFilePaths) { + return new CustomBackgroundVariantFrequencyServiceFactory(backgroundFilePaths); + } + + private CustomBackgroundVariantFrequencyServiceFactory(Map backgroundFilePaths) { + this.backgroundFilePaths = Objects.requireNonNull(backgroundFilePaths); + } + + @Override + public Optional forGenomeBuild(GenomeBuild genomeBuild, double defaultVariantBackgroundFrequency) { + Path backgroundFile = backgroundFilePaths.get(genomeBuild); + if (backgroundFile == null) { + return Optional.empty(); + } else { + try (BufferedReader reader = Files.newBufferedReader(backgroundFile)) { + Map frequencyMap = BackgroundVariantFrequencyParser.parse(reader); + return Optional.of(BackgroundVariantFrequencyService.of(frequencyMap, defaultVariantBackgroundFrequency)); + } catch (IOException e) { + LOGGER.warn("Unable to read background frequency file at {}", backgroundFile.toAbsolutePath()); + return Optional.empty(); + } + } + } + +} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarFunctionalVariantAnnotatorService.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarFunctionalVariantAnnotatorService.java new file mode 100644 index 000000000..1a286758f --- /dev/null +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarFunctionalVariantAnnotatorService.java @@ -0,0 +1,109 @@ +package org.monarchinitiative.lirical.io.service; + +import de.charite.compbio.jannovar.data.JannovarData; +import de.charite.compbio.jannovar.data.JannovarDataSerializer; +import de.charite.compbio.jannovar.data.SerializationException; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; +import org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotator; +import org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotatorService; +import org.monarchinitiative.lirical.io.LiricalDataException; +import org.monarchinitiative.lirical.io.LiricalDataResolver; +import org.monarchinitiative.phenol.annotations.formats.GeneIdentifiers; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Collectors; + +public class JannovarFunctionalVariantAnnotatorService implements FunctionalVariantAnnotatorService { + + private static final Logger LOGGER = LoggerFactory.getLogger(JannovarFunctionalVariantAnnotatorService.class); + + private final LiricalDataResolver liricalDataResolver; + private final GeneIdentifiers geneIdentifiers; + private final Map> knownAnnotators; + + private GenomeBuild genomeBuild; + private TranscriptDatabase transcriptDatabase; + private FunctionalVariantAnnotator functionalVariantAnnotator; + + public static JannovarFunctionalVariantAnnotatorService of(LiricalDataResolver liricalDataResolver, + GeneIdentifiers geneIdentifiers) { + Map> knownAnnotators = initializeAnnotators(liricalDataResolver); + return new JannovarFunctionalVariantAnnotatorService(liricalDataResolver, geneIdentifiers, knownAnnotators); + } + + private JannovarFunctionalVariantAnnotatorService(LiricalDataResolver liricalDataResolver, + GeneIdentifiers geneIdentifiers, + Map> knownAnnotators) { + this.liricalDataResolver = Objects.requireNonNull(liricalDataResolver); + this.geneIdentifiers = Objects.requireNonNull(geneIdentifiers); + this.knownAnnotators = knownAnnotators; + } + + @Override + public Optional getFunctionalAnnotator(GenomeBuild genomeBuild, + TranscriptDatabase transcriptDatabase) { + if (knownAnnotators.getOrDefault(genomeBuild, Set.of()).contains(transcriptDatabase)) { + synchronized (this) { + if (this.genomeBuild != genomeBuild || this.transcriptDatabase != transcriptDatabase) { + LOGGER.debug("Loading transcript database for {}:{}", genomeBuild, transcriptDatabase); + Path txDatabasePath = liricalDataResolver.transcriptCacheFor(genomeBuild, transcriptDatabase); + try { + functionalVariantAnnotator = loadFunctionalVariantAnnotator(txDatabasePath, geneIdentifiers); + } catch (LiricalDataException e) { + LOGGER.warn("Unable to load transcript database from {}", txDatabasePath.toAbsolutePath()); + return Optional.empty(); + } + this.genomeBuild = genomeBuild; + this.transcriptDatabase = transcriptDatabase; + } + return Optional.of(functionalVariantAnnotator); + } + } else { + return Optional.empty(); + } + } + + private static Map> initializeAnnotators(LiricalDataResolver liricalDataResolver) { + Map> annotators = new HashMap<>(); + if (Files.isReadable(liricalDataResolver.hg19RefseqTxDatabase())) + annotators.computeIfAbsent(GenomeBuild.HG19, gb -> new HashSet<>()).add(TranscriptDatabase.REFSEQ); + if (Files.isReadable(liricalDataResolver.hg19UcscTxDatabase())) + annotators.computeIfAbsent(GenomeBuild.HG19, gb -> new HashSet<>()).add(TranscriptDatabase.UCSC); + + if (Files.isReadable(liricalDataResolver.hg38RefseqTxDatabase())) + annotators.computeIfAbsent(GenomeBuild.HG38, gb -> new HashSet<>()).add(TranscriptDatabase.REFSEQ); + if (Files.isReadable(liricalDataResolver.hg38UcscTxDatabase())) + annotators.computeIfAbsent(GenomeBuild.HG38, gb -> new HashSet<>()).add(TranscriptDatabase.UCSC); + + String configured = annotators.entrySet().stream() + .flatMap(e -> e.getValue().stream() + .map(v -> "%s -> %s".formatted(e.getKey(), v.toString()))) + .collect(Collectors.joining(", ")); + if (annotators.isEmpty()) + LOGGER.warn("No functional annotators were configured"); + else + LOGGER.debug("Configured Jannovar functional annotators for {}", configured); + + return annotators; + } + + private FunctionalVariantAnnotator loadFunctionalVariantAnnotator(Path txDatabasePath, + GeneIdentifiers geneIdentifiers) throws LiricalDataException { + JannovarData jannovarData = loadJannovarData(txDatabasePath); + return JannovarFunctionalVariantAnnotator.of(jannovarData, geneIdentifiers); + } + + private static JannovarData loadJannovarData(Path txDatabasePath) throws LiricalDataException { + LOGGER.info("Loading transcript database from {}", txDatabasePath.toAbsolutePath()); + try { + return new JannovarDataSerializer(txDatabasePath.toAbsolutePath().toString()).load(); + } catch (SerializationException e) { + throw new LiricalDataException(e); + } + } +} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/VcfVariantParserFactory.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/VcfVariantParserFactory.java index 408e9c723..864ed13f5 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/VcfVariantParserFactory.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/VcfVariantParserFactory.java @@ -1,41 +1,60 @@ package org.monarchinitiative.lirical.io.vcf; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; import org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotator; +import org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotatorService; import org.monarchinitiative.lirical.core.service.VariantMetadataService; import org.monarchinitiative.lirical.core.io.VariantParser; import org.monarchinitiative.lirical.core.io.VariantParserFactory; +import org.monarchinitiative.lirical.core.service.VariantMetadataServiceFactory; +import org.monarchinitiative.svart.assembly.GenomicAssemblies; import org.monarchinitiative.svart.assembly.GenomicAssembly; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.nio.file.Path; import java.util.Objects; +import java.util.Optional; public class VcfVariantParserFactory implements VariantParserFactory { - private final GenomicAssembly genomicAssembly; - private final FunctionalVariantAnnotator variantAnnotator; - private final VariantMetadataService metadataService; + private static final Logger LOGGER = LoggerFactory.getLogger(VcfVariantParserFactory.class); - public static VcfVariantParserFactory of(GenomicAssembly genomicAssembly, - FunctionalVariantAnnotator variantAnnotator, - VariantMetadataService metadataService) { - return new VcfVariantParserFactory(genomicAssembly, variantAnnotator, metadataService); + private final FunctionalVariantAnnotatorService variantAnnotatorService; + private final VariantMetadataServiceFactory metadataServiceFactory; + + public static VcfVariantParserFactory of(FunctionalVariantAnnotatorService variantAnnotatorService, + VariantMetadataServiceFactory metadataServiceFactory) { + return new VcfVariantParserFactory(variantAnnotatorService, metadataServiceFactory); } - private VcfVariantParserFactory(GenomicAssembly genomicAssembly, - FunctionalVariantAnnotator variantAnnotator, - VariantMetadataService metadataService) { - this.genomicAssembly = Objects.requireNonNull(genomicAssembly); - this.variantAnnotator = Objects.requireNonNull(variantAnnotator, "Variant annotator must not be null!"); - this.metadataService = Objects.requireNonNull(metadataService); + private VcfVariantParserFactory(FunctionalVariantAnnotatorService variantAnnotatorService, + VariantMetadataServiceFactory metadataServiceFactory) { + this.variantAnnotatorService = Objects.requireNonNull(variantAnnotatorService, "Variant annotator must not be null!"); + this.metadataServiceFactory = Objects.requireNonNull(metadataServiceFactory, "Metadata service factory must not be null!"); } @Override - public GenomicAssembly genomicAssembly() { - return genomicAssembly; + public Optional forPath(Path path, GenomeBuild genomeBuild, TranscriptDatabase transcriptDatabase) { + GenomicAssembly genomicAssembly = parseSvartGenomicAssembly(genomeBuild); + Optional annotator = variantAnnotatorService.getFunctionalAnnotator(genomeBuild, transcriptDatabase); + Optional metadataService = metadataServiceFactory.getVariantMetadataService(genomeBuild); + + if (annotator.isEmpty()) + LOGGER.warn("Cannot configure functional variant annotator for {} {}", genomeBuild, transcriptDatabase); + if (metadataService.isEmpty()) + LOGGER.warn("Cannot configure variant metadata service for {}", genomeBuild); + + return annotator.isPresent() && metadataService.isPresent() + ? Optional.of(new VcfVariantParser(path, genomicAssembly, genomeBuild, annotator.get(), metadataService.get())) + : Optional.empty(); } - @Override - public VariantParser forPath(Path path) { - return new VcfVariantParser(path, genomicAssembly, genomeBuild(), variantAnnotator, metadataService); + static GenomicAssembly parseSvartGenomicAssembly(GenomeBuild genomeAssembly) { + return switch (genomeAssembly) { + case HG19 -> GenomicAssemblies.GRCh37p13(); + case HG38 -> GenomicAssemblies.GRCh38p13(); + }; } } diff --git a/pom.xml b/pom.xml index b55fa5119..be8bb9088 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ 17 ${java.version} ${java.version} - 2.0.0-RC4 + 2.0.0-RC5 2.0.0-RC1 0.35 @@ -276,6 +276,11 @@ ${java.version} + + org.apache.maven.plugins + maven-resources-plugin + 3.3.0 + org.apache.maven.plugins maven-surefire-plugin