From 4bd2c049afa86a5cefac4359c16465ddcbca2a97 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 5 Oct 2022 12:53:58 -0400 Subject: [PATCH 01/20] Simplify CLI commands. Signed-off-by: Daniel Danis --- .../phenopackets/phenopackettools/Main.java | 49 ++++++++++++++++--- .../phenopackettools/PhenopacketTools.java | 23 --------- .../command/ConvertCommand.java | 3 +- .../command/ExamplesCommand.java | 2 +- 4 files changed, 45 insertions(+), 32 deletions(-) delete mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/PhenopacketTools.java diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java index c22edc28..aeecb27b 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java @@ -1,20 +1,55 @@ package org.phenopackets.phenopackettools; +import org.phenopackets.phenopackettools.command.BasicValidateCommand; +import org.phenopackets.phenopackettools.command.ConvertCommand; +import org.phenopackets.phenopackettools.command.ExamplesCommand; +import picocli.AutoComplete; import picocli.CommandLine; +import java.util.Locale; + +import static picocli.CommandLine.Help.Ansi.Style.*; + +@CommandLine.Command(name = "phenopacket-tools", + header = Main.HEADER, + version = Main.VERSION, + mixinStandardHelpOptions = true, + subcommands = { + // see https://picocli.info/autocomplete.html + AutoComplete.GenerateCompletion.class, + BasicValidateCommand.class, + ConvertCommand.class, + ExamplesCommand.class, + }, + usageHelpWidth = Main.USAGE_WIDTH, + footer = Main.FOOTER) public class Main { + public static final String HEADER = "\nAn application for creating, converting and validating GA4GH phenopackets\n"; + public static final String VERSION = "phenopacket-tools v0.4.6-SNAPSHOT"; + // Maximum number of characters in line of the usage message. public static final int USAGE_WIDTH = 120; + private static final CommandLine.Help.ColorScheme COLOR_SCHEME = new CommandLine.Help.ColorScheme.Builder() + .commands(bold, fg_blue, underline) + .options(fg_yellow) + .parameters(fg_yellow) + .optionParams(italic) + .build(); + + public static final String FOOTER = "\nSee the full documentation at https://phenopacket-tools.readthedocs.io\n"; + + private Main() { + // private no-op + } + public static void main(String[] args) { - if (args.length == 0) { - // if the user doesn't pass any command or option, add -h to show help - args = new String[]{"-h"}; - } - CommandLine cline = new CommandLine(new PhenopacketTools()); + Locale.setDefault(Locale.US); + //noinspection InstantiationOfUtilityClass + CommandLine cline = new CommandLine(new Main()) + .setColorScheme(COLOR_SCHEME); cline.getSubcommands().get("generate-completion").getCommandSpec().usageMessage().hidden(true); - int exitCode = cline.execute(args); - System.exit(exitCode); + System.exit(cline.execute(args)); } } \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/PhenopacketTools.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/PhenopacketTools.java deleted file mode 100644 index d080d196..00000000 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/PhenopacketTools.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.phenopackets.phenopackettools; - -import org.phenopackets.phenopackettools.command.ConvertCommand; -import org.phenopackets.phenopackettools.command.ExamplesCommand; -import org.phenopackets.phenopackettools.command.BasicValidateCommand; -import picocli.AutoComplete; - -import static picocli.CommandLine.*; - -@Command(name = "phenopacket-tools", - version = "0.4.6-SNAPSHOT", - mixinStandardHelpOptions = true, - usageHelpWidth = Main.USAGE_WIDTH, - subcommands = { - // see https://picocli.info/autocomplete.html - AutoComplete.GenerateCompletion.class, - ExamplesCommand.class, - ConvertCommand.class, - BasicValidateCommand.class - } -) -public class PhenopacketTools { -} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index aa4c4f79..4d9dd9b6 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -18,7 +18,8 @@ @Command(name = "convert", mixinStandardHelpOptions = true, - description = "Convert a v1.0 phenopacket to a v2.0 phenopacket. Beware this process could be lossy!") + description = "Convert a v1.0 phenopacket to a v2.0 phenopacket.", + footer = "Beware this process could be lossy!") public class ConvertCommand implements Callable { @Parameters(index = "0", arity = "1", description = "Input phenopacket file") diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java index f63c5318..1fa56f26 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java @@ -24,7 +24,7 @@ @Command(name = "examples", mixinStandardHelpOptions = true, - description = "Write example phenopackets to directory.") + description = "Write example phenopackets to a directory.") public class ExamplesCommand implements Callable { @CommandLine.Option(names = {"-o", "--output"}, From 2b7751ca8889ca96355c7b82ceafc671125335f6 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 5 Oct 2022 13:08:42 -0400 Subject: [PATCH 02/20] Draft CLI documentation. Signed-off-by: Daniel Danis --- docs/cli.rst | 82 +++++++++++++++++++++++++++++++++++++++++++++++ docs/creating.rst | 31 ++++++++++-------- docs/index.rst | 21 ++++++++---- 3 files changed, 114 insertions(+), 20 deletions(-) create mode 100644 docs/cli.rst diff --git a/docs/cli.rst b/docs/cli.rst new file mode 100644 index 00000000..eb16f55c --- /dev/null +++ b/docs/cli.rst @@ -0,0 +1,82 @@ +.. _rstcli: + +============================ +Command line interface (CLI) +============================ + +*phenopacket-tools* CLI provides functionality for viewing, conversion and validation +of the top-level elements of Phenopacket schema . This document describes how to set up the CLI application +on Linux, Mac and Windows environments. + +.. note:: + *phenopacket-tools* is written in Java 17 and requires Java 17 or newer to run. + +*phenopacket-tools* is distributed as a standalone executable Java Archive (JAR) file. Provided that Java 17 or better +is available in the environment, the application requires no special installation procedure. + +Setup +~~~~~ + +Most users should download the precompiled JAR file from *phenopacket-tools* release page. +However, it is also possible to build the JAR from sources. + +Download +^^^^^^^^ + +*phenopacket-tools* JAR is provided as part of *phenopacket-tools*' release schedule +from `Releases `_. + +Build from source code +^^^^^^^^^^^^^^^^^^^^^^ + +The source code is available in our `GitHub repository `_. +There are 2 requirements for building the app from sources: + +* **Java Development Kit** (JDK) 17 or newer must be present in the environment and ``$JAVA_HOME`` variable must point + to JDK's location. See `Installing Apache Maven `_ for more details regarding + setting up JDK and ``$JAVA_HOME`` on your system. +* *phenopacket-tools* leverages several open-source Java libraries and a **working internet connection** + is required to download the libraries. + +Run the following commands to check out the stable source code and to build the application:: + + $ git clone https://github.com/phenopackets/phenopacket-tools + $ cd phenopacket-tools + $ ./mvnw -Prelease package + +After a successful build, a file ``phenopacket-tools-cli-${project.version}.jar`` will be created in +the ``phenopacket-tools-cli/target`` directory. Use the JAR file in the same way as the JAR downloaded +from *phenopacket-tools* releases. + +.. note:: + Replace ``${project.version}`` with a given version (e.g. ``0.4.6``). + + +Commands +~~~~~~~~ + +This section describes the commands of *phenopacket-tools* CLI. + +``validate`` - validate semantic and syntactic correctness +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``validate`` command checks *syntactic* and *semantic* correctness of a *phenopacket*, *family*, or *cohort*. +Briefly, phenopacket is syntactically correct if it is well formatted (valid Protobuf message, JSON document, etc.), +meets the requirements of the *Phenopacket schema* (e.g. the REQUIRED attributes such as ``phenopacket.id`` and +``phenopacket.meta_data``, are set), and ``MetaData`` includes a ``Resource`` for all ontology concepts. + + + +.. + TODO - check the validation description. + +``convert`` - convert from v1 to v2 format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +TODO - write the section + +Set up autocompletion +~~~~~~~~~~~~~~~~~~~~~ + +TODO - write the section + diff --git a/docs/creating.rst b/docs/creating.rst index 6807592a..599d90e7 100644 --- a/docs/creating.rst +++ b/docs/creating.rst @@ -5,23 +5,24 @@ Creating Phenopackets ===================== -Google's `Protocol Buffer (protobuf) `_ framework automatically generates -Java code for building and working with Phenopackets. However, the code can be unwieldly. Additionally, many users -of the phenopacket framework will want to use a recommeded set of ontology terms for specific kinds of data, and thus -the phenopacket-tools library provides constants that are more convenient to use than manually creating the equilavent message. +Google's `Protocol Buffer (protobuf)`_ framework automatically generates +Java code for building and working with Phenopackets. However, the code can be unwieldy. Additionally, many users +of the phenopacket framework will want to use a recommended set of ontology terms for specific kinds of data, and thus +the *phenopacket-tools* library provides terms and constants that are more convenient to use than manually creating +the equivalent message. -Phenopacket-tools builder pattern -================================= +`phenopacket-tools` builder pattern +=================================== -In this example, let's imagine we want to create a PhenotypicFeature element to denote that severe weakness of the +In this example, let's imagine we want to create a ``PhenotypicFeature`` element to denote that severe weakness of the left triceps muscle was observed in a patient at the age of 11 years and 4 months. First, let us code this using the Phenopacket code that is automatically generated by the protobuf framework. .. code-block:: java - OntologyClass tricepsWeakenss = OntologyClass.newBuilder() + OntologyClass tricepsWeakness = OntologyClass.newBuilder() .setId("HP:0031108") .setLabel("Triceps weakness") .build(); @@ -37,18 +38,19 @@ the Phenopacket code that is automatically generated by the protobuf framework. TimeElement ageElement = TimeElement.newBuilder().setAge(iso8601duration) .setAge(iso8601duration) .build(); - PhenotypicFeature pfeature = PhenotypicFeature.newBuilder() - .setType(tricepsWeakenss) + PhenotypicFeature phenotypicFeature = PhenotypicFeature.newBuilder() + .setType(tricepsWeakness) .setOnset(ageElement) .setSeverity(severe) .addModifiers(left) .build(); -The following code block uses functions from the phenopacket-tools library to simplify the creation of this PhenotypicFeature element. +The following code block uses functions from the *phenopacket-tools* library to simplify the creation +of this ``PhenotypicFeature`` element. .. code-block:: java - PhenotypicFeature pfeature2 = PhenotypicFeatureBuilder.builder("HP:0031108","Triceps weakness") + PhenotypicFeature phenotypicFeature2 = PhenotypicFeatureBuilder.builder("HP:0031108", "Triceps weakness") .addModifier(severe()) .addModifier(left()) .onset(TimeElements.age("P11Y4M")) @@ -78,4 +80,7 @@ Both code snippets generate identical phenopacket code. } } -Several detailed examples are available in the phenopackets-tools-cli module in the ``examples`` package. \ No newline at end of file +Several detailed examples are available in the ``phenopackets-tools-cli`` module in the `examples`_ package. + +.. _examples: https://github.com/phenopackets/phenopacket-tools/tree/main/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/examples +.. _Protocol Buffer (protobuf): https://developers.google.com/protocol-buffers diff --git a/docs/index.rst b/docs/index.rst index 9c5c6c59..90e1a288 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,24 +20,31 @@ genetic information, diagnoses, and treatments. The Phenopacket schema supports Phenopackets are designed to be both human and machine-interpretable, enabling computing operations and validation on the basis of defined relationships between diagnoses, lab measurements, and genotypic information. -The phenopacket-tools library was written as a modular Java 17 library and has three main goals. +The *phenopacket-tools* library was written as a modular Java 17 library and has three main goals. -- To provide a simplified interface for creating GA4GH phenopackets with Java code -- To provide an extensible validation framework that can be used to check phenopackets for syntactical and semantic correctness. -- To enable developers to extend the validation framework to encode the specific requirements of consortia or projects using either JSON schema or programmatic tools. - -Additionally, phenopacket-tools provides code to convert version 1 to the version 2 (the current version) of the Schema. +- To simplify creating phenopackets with Java code using streamlined builders and predefined building blocks such + as units, anatomical organs, and clinical modifiers. +- To provide a validation framework for checking phenopackets for syntactical and semantic correctness + and to enable developers to extend the validation framework to encode the specific requirements of consortia + or projects using either JSON schema or programmatic tools. +- To convert/lift phenopackets from the obsoleted version 1 to the version 2 (current) of the Schema. +On top of the library, we provide a standalone command-line interface (CLI) application for +conversion and validation. The following sections first describe the library and the last section instructs +how to use the CLI application on your system. +.. + TODO - review the three points and sync them with the manuscript. .. toctree:: :maxdepth: 2 :caption: Contents: creating - converting validation + converting constants + cli .. image:: https://onlinelibrary.wiley.com/cms/asset/1cc0a141-da65-45a3-b7b0-6316b7b02069/ggn2202200016-fig-0002-m.jpg From 57a5a293c1d067eeadd833dff3c5269946993d00 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 5 Oct 2022 20:45:11 -0400 Subject: [PATCH 03/20] Improve format sniffing and extract the code into `util` module. Signed-off-by: Daniel Danis --- .../src/main/java/module-info.java | 1 + phenopacket-tools-util/pom.xml | 14 + .../src/main/java/module-info.java | 3 + .../util/format/FormatSniffException.java | 24 ++ .../util/format/FormatSniffer.java | 68 ++++ .../util/format/PhenopacketElement.java | 12 + .../util/format/PhenopacketFormat.java | 32 ++ .../phenopackettools/util/format/Util.java | 34 ++ .../util/format/package-info.java | 4 + .../util/format/FormatSnifferTest.java | 40 +++ .../util/format/comprehensive-cohort.pb | 80 +++++ .../util/format/comprehensive-family.pb | 83 +++++ .../util/format/comprehensive-phenopacket.pb | 60 ++++ .../phenopackettools/util/format/covid.json | 317 ++++++++++++++++++ .../phenopackettools/util/format/covid.yml | 214 ++++++++++++ .../pom.xml | 5 + .../src/main/java/module-info.java | 1 + .../JsonSchemaValidationWorkflowRunner.java | 16 +- .../validator/jsonschema/Util.java | 18 - pom.xml | 1 + 20 files changed, 1004 insertions(+), 23 deletions(-) create mode 100644 phenopacket-tools-util/pom.xml create mode 100644 phenopacket-tools-util/src/main/java/module-info.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java create mode 100644 phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java create mode 100644 phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java create mode 100644 phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-cohort.pb create mode 100644 phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-family.pb create mode 100644 phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-phenopacket.pb create mode 100644 phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.json create mode 100644 phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.yml delete mode 100644 phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/Util.java diff --git a/phenopacket-tools-cli/src/main/java/module-info.java b/phenopacket-tools-cli/src/main/java/module-info.java index 97bc502a..1f4d50b6 100644 --- a/phenopacket-tools-cli/src/main/java/module-info.java +++ b/phenopacket-tools-cli/src/main/java/module-info.java @@ -1,4 +1,5 @@ module org.phenopackets.phenopackettools.cli { + requires org.phenopackets.phenopackettools.util; requires org.phenopackets.phenopackettools.converter; requires org.phenopackets.phenopackettools.builder; requires org.phenopackets.phenopackettools.validator.jsonschema; diff --git a/phenopacket-tools-util/pom.xml b/phenopacket-tools-util/pom.xml new file mode 100644 index 00000000..42e30e91 --- /dev/null +++ b/phenopacket-tools-util/pom.xml @@ -0,0 +1,14 @@ + + + + phenopacket-tools + org.phenopackets.phenopackettools + 0.4.6-SNAPSHOT + + 4.0.0 + + phenopacket-tools-util + + \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/module-info.java b/phenopacket-tools-util/src/main/java/module-info.java new file mode 100644 index 00000000..9d20effb --- /dev/null +++ b/phenopacket-tools-util/src/main/java/module-info.java @@ -0,0 +1,3 @@ +module org.phenopackets.phenopackettools.util { + exports org.phenopackets.phenopackettools.util.format; +} \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java new file mode 100644 index 00000000..3cc3a382 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffException.java @@ -0,0 +1,24 @@ +package org.phenopackets.phenopackettools.util.format; + +/** + * An exception thrown when sniffing of the top-level element of Phenopacket schema cannot be performed. + */ +public class FormatSniffException extends Exception { + + public FormatSniffException() { + super(); + } + + public FormatSniffException(String message) { + super(message); + } + + public FormatSniffException(String message, Throwable cause) { + super(message, cause); + } + + public FormatSniffException(Throwable cause) { + super(cause); + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java new file mode 100644 index 00000000..0d4984d6 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/FormatSniffer.java @@ -0,0 +1,68 @@ +package org.phenopackets.phenopackettools.util.format; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Make an educated guess of the format of a top-level element of Phenopacket schema. + */ +public class FormatSniffer { + + /** + * The number of bytes used for format sniffing. + */ + static final int BUFFER_SIZE = 32; + + private FormatSniffer() { + } + + /** + * Make an educated guess of {@link PhenopacketFormat} based on given {@code payload}. + * + * @param payload buffer with at least the first {@link #BUFFER_SIZE} bytes of the input. + * @return the sniffed {@link PhenopacketFormat}. + * @throws FormatSniffException if {@code payload} contains less than {@link #BUFFER_SIZE} bytes. + */ + public static PhenopacketFormat sniff(byte[] payload) throws FormatSniffException { + if (payload.length < BUFFER_SIZE) + throw new FormatSniffException("Need at least %d bytes to sniff but got %d".formatted(BUFFER_SIZE, payload.length)); + if (Util.looksLikeJson(payload)) { + return PhenopacketFormat.JSON; + } else if (Util.looksLikeYaml(payload)) { + return PhenopacketFormat.YAML; + } else { + // No JSON, no YAML, it is likely protobuf bytes or some other bytes. + // Trying to interpret the bytes as a protobuf message downstream is the best we can do. + // TODO - implement the best possible guessing based on + // https://developers.google.com/protocol-buffers/docs/encoding + return PhenopacketFormat.PROTOBUF; + } + } + + /** + * Make an educated guess of {@link PhenopacketFormat} present in given {@code input}. + * + * @param input an {@link InputStream} that supports {@link InputStream#mark(int)}. + * @return the sniffed {@link PhenopacketFormat}. + * @throws IOException in case an error occurs while reading the {@code input}. + * @throws FormatSniffException if there are not enough bytes available in the {@code input} of if the {@code input} does not + * support {@link InputStream#mark(int)}. + */ + public static PhenopacketFormat sniff(InputStream input) throws IOException, FormatSniffException { + if (input.markSupported()) { + byte[] buffer = new byte[BUFFER_SIZE]; + input.mark(BUFFER_SIZE); + int read = input.read(buffer); + if (read < BUFFER_SIZE) { + // We explode because there are not enough bytes available for format sniffing. + String message = read < 0 + ? "The stream must not be at the end" + : "Need at least %d bytes to sniff the format but only %d was available".formatted(BUFFER_SIZE, read); + throw new FormatSniffException(message); + } + input.reset(); + return sniff(buffer); + } else + throw new FormatSniffException("The provided InputStream does not support `mark()`"); + } +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java new file mode 100644 index 00000000..46e66f82 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java @@ -0,0 +1,12 @@ +package org.phenopackets.phenopackettools.util.format; + +/** + * The enum members represent the top-level elements of the Phenopacket schema. + */ +public enum PhenopacketElement { + + PHENOPACKET, + FAMILY, + COHORT + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java new file mode 100644 index 00000000..4227f81a --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java @@ -0,0 +1,32 @@ +package org.phenopackets.phenopackettools.util.format; + +import java.util.Arrays; +import java.util.stream.Collectors; + +/** + * The enum members represent the supported representations of the top-level elements of the Phenopacket schema. + */ +public enum PhenopacketFormat { + + PROTOBUF, + JSON, + YAML; + + public static PhenopacketFormat parse(String value) { + switch (value.toLowerCase()) { + case "protobuf": + return PROTOBUF; + case "json": + return JSON; + case "yaml": + return YAML; + default: + String expected = String.join(Arrays.stream(PhenopacketFormat.values()) + .map(Enum::name) + .map(String::toLowerCase) + .collect(Collectors.joining(", ", "{", "}"))); + throw new IllegalArgumentException("Expected one of %s but got %s".formatted(expected, value)); + } + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java new file mode 100644 index 00000000..603769b1 --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/Util.java @@ -0,0 +1,34 @@ +package org.phenopackets.phenopackettools.util.format; + +import java.io.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +class Util { + + private static final Pattern YAML_HEADER = Pattern.compile("((phenopacket)|(family)|(cohort)):"); + + private Util() { + // static utility class + } + + static boolean looksLikeJson(byte[] payload) { + String head = new String(payload, 0, FormatSniffer.BUFFER_SIZE); + return looksLikeJson(head); + } + + static boolean looksLikeJson(String head) { + return head.replace("\\W+", "").startsWith("{"); + } + + static boolean looksLikeYaml(byte[] payload) { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(payload)))) { + String line = reader.readLine(); + Matcher matcher = YAML_HEADER.matcher(line); + return matcher.matches(); + } catch (IOException e) { + return false; + } + } + +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java new file mode 100644 index 00000000..fecd687b --- /dev/null +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/package-info.java @@ -0,0 +1,4 @@ +/** + * Defines the supported phenopacket formats and utility methods for working with the formats. + */ +package org.phenopackets.phenopackettools.util.format; \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java new file mode 100644 index 00000000..5a1200fb --- /dev/null +++ b/phenopacket-tools-util/src/test/java/org/phenopackets/phenopackettools/util/format/FormatSnifferTest.java @@ -0,0 +1,40 @@ +package org.phenopackets.phenopackettools.util.format; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class FormatSnifferTest { + + private static final Path BASE_DIR = Path.of("src/test/resources/org/phenopackets/phenopackettools/util/format"); + + @ParameterizedTest + @CsvSource({ + "comprehensive-cohort.pb, PROTOBUF", + "comprehensive-family.pb, PROTOBUF", + "comprehensive-phenopacket.pb, PROTOBUF", + "covid.json, JSON", + "covid.yml, YAML", + }) + public void sniff(String fileName, PhenopacketFormat expected) throws Exception { + byte[] payload = readAllBytes(fileName); + PhenopacketFormat format; + format = FormatSniffer.sniff(payload); + + assertThat(format, equalTo(expected)); + } + + private static byte[] readAllBytes(String fileName) throws IOException { + try (InputStream is = new BufferedInputStream(Files.newInputStream(BASE_DIR.resolve(fileName)))) { + return is.readAllBytes(); + } + } +} \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-cohort.pb b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-cohort.pb new file mode 100644 index 00000000..5c57f5ee --- /dev/null +++ b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-cohort.pb @@ -0,0 +1,80 @@ + +comprehensive-cohort-id$A description of the example cohort.ò +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandÀÄd" +P14Y08B +NCBITaxon:9606 homo sapiensï& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ß/ + +HP:0031910!Abnormal cranial nerve physiologyR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ò# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset"– + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJÁ + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0"– +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F*Á + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-family.pb b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-family.pb new file mode 100644 index 00000000..fb131057 --- /dev/null +++ b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-family.pb @@ -0,0 +1,83 @@ + +comprehensive-family-idò +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandÀÄd" +P14Y08B +NCBITaxon:9606 homo sapiensï& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ß/ + +HP:0031910!Abnormal cranial nerve physiologyR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ò# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset"– + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJÁ + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.  + +MOTHER0  + +FATHER0"C +%14 year-old boyFATHER"MOTHER(0 + MOTHER(0 + FATHER(0*– +file://data/genomes/FAM000001"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001C* +MOTHERP000001M* +FATHERP000001F2Á + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-phenopacket.pb b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-phenopacket.pb new file mode 100644 index 00000000..8fe66cc2 --- /dev/null +++ b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/comprehensive-phenopacket.pb @@ -0,0 +1,60 @@ + +comprehensive-phenopacket-id\ +14 year-old boyboypatientprobandÀÄd" +P14Y08B +NCBITaxon:9606 homo sapiensï& + +HP:0001558Decreased fetal movementJ + +HP:0011461 Fetal onsetR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ß/ + +HP:0031910!Abnormal cranial nerve physiologyR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.ò# + +HP:0011463Macroscopic hematuria* + +HP:0031796 Recurrent2 +P14YR© +@ + ECO:00000331author statement supported by traceable referencee + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report.N + +HP:0001270 Motor delay" + +HP:0012825MildJ + +HP:0011463Childhood onset"– + biosample-id14 year-old boy Muscle biopsy of 14 year-old boy"! +UBERON:0003403skin of forearm2 +NCBITaxon:9606 homo sapiens: +P14YJ + NCIT:C38757Negative FindingR + +NCIT:C3677Benign NeoplasmZ& + NCIT:C28076Disease Grade Qualifierb + NCIT:C68748HER2/Neu Positive* + +HGNC1:3688FGFR126NM_001848.2:c.877G>A2 + GENO:0000135 heterozygous:B + + OMIM:101600PFEIFFER SYNDROME" + +HP:0003577Congenital onsetBm +file://data/genomes/P000001C"Whole genome sequencing VCF output" +GRCh38.p13* +14 year-old boyP000001CJÁ + +¨ ì™ÀÄPeter R.PhenopacketLab"y +hphuman phenotype ontology%http://purl.obolibrary.org/obo/hp.owl" +2018-03-08*HP2"http://purl.obolibrary.org/obo/HP_"z +genoGenotype Ontology'http://purl.obolibrary.org/obo/geno.owl" +19-03-2018*GENO2$http://purl.obolibrary.org/obo/GENO_"< +pubmedPubMed*PMID2$https://www.ncbi.nlm.nih.gov/pubmed/"v +ncit NCI Thesaurus'http://purl.obolibrary.org/obo/ncit.owl" +20-03-2020*NCIT2$http://purl.obolibrary.org/obo/NCIT_21.0.0:e + PMID:30808312TCOL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report. \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.json b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.json new file mode 100644 index 00000000..21125cb9 --- /dev/null +++ b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.json @@ -0,0 +1,317 @@ +{ + "id": "arbitrary.phenopacket.id", + "subject": { + "id": "P123542", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P70Y" + } + }, + "vitalStatus": { + "status": "DECEASED", + "causeOfDeath": { + "id": "MONDO:0100096", + "label": "COVID-19" + } + }, + "sex": "MALE" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0001945", + "label": "Fever " + }, + "onset": { + "timestamp": "2021-02-01T05:00:00Z" + } + }, { + "type": { + "id": "HP:0030157", + "label": "Flank pain" + }, + "onset": { + "timestamp": "2021-02-01T05:00:00Z" + } + }, { + "type": { + "id": "HP:0000790", + "label": "Hematuria" + }, + "onset": { + "timestamp": "2021-02-01T05:00:00Z" + } + }, { + "type": { + "id": "HP:0012625", + "label": "Stage 3 chronic kidney disease" + }, + "onset": { + "timestamp": "2021-02-01T05:00:00Z" + } + }, { + "type": { + "id": "HP:0003326", + "label": "Myalgia" + }, + "onset": { + "interval": { + "start": "2020-03-18T00:00:00Z", + "end": "2020-03-20T00:00:00Z" + } + } + }, { + "type": { + "id": "HP:0002014", + "label": "Diarrhea" + }, + "onset": { + "interval": { + "start": "2020-03-18T00:00:00Z", + "end": "2020-03-20T00:00:00Z" + } + } + }, { + "type": { + "id": "HP:0002094", + "label": "Dyspnea" + }, + "onset": { + "interval": { + "start": "2020-03-18T00:00:00Z", + "end": "2020-03-20T00:00:00Z" + } + } + }, { + "type": { + "id": "HP:0033677", + "label": "Acute respiratory distress syndrome" + }, + "onset": { + "timestamp": "2020-03-20T00:00:00Z" + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:26474-7", + "label": "Lymphocytes [#/volume] in Blood" + }, + "value": { + "quantity": { + "unit": { + "id": "NCIT:C67245", + "label": "Thousand Cells" + }, + "value": 1.4 + } + }, + "timeObserved": { + "interval": { + "start": "2019-09-01T00:00:00Z", + "end": "2020-03-01T00:00:00Z" + } + } + }, { + "assay": { + "id": "LOINC:26474-7", + "label": "Lymphocytes [#/volume] in Blood" + }, + "value": { + "quantity": { + "unit": { + "id": "NCIT:C67245", + "label": "Thousand Cells" + }, + "value": 0.7 + } + }, + "timeObserved": { + "timestamp": "2020-03-20T00:00:00Z" + } + }], + "diseases": [{ + "term": { + "id": "MONDO:0005015", + "label": "diabetes mellitus" + }, + "excluded": true + }, { + "term": { + "id": "MONDO:0004994", + "label": "cardiomyopathy" + } + }, { + "term": { + "id": "MONDO:0100096", + "label": "COVID-19" + }, + "onset": { + "timestamp": "2020-03-17T00:00:00Z" + } + }], + "medicalActions": [{ + "procedure": { + "code": { + "id": "NCIT:C80473", + "label": "Left Ventricular Assist Device" + }, + "performed": { + "timestamp": "2016-01-01T00:00:00Z" + } + } + }, { + "treatment": { + "agent": { + "id": "NCIT:C722", + "label": "Oxygen" + }, + "routeOfAdministration": { + "id": "NCIT:C38284", + "label": "Nasal Route of Administration" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "NCIT:C67388", + "label": "Liter per Minute" + }, + "value": 2.0 + }, + "scheduleFrequency": { + "id": "PATO:0000689", + "label": "continuous" + }, + "interval": { + "start": "2021-02-01T18:58:43Z", + "end": "2021-02-02T08:22:42Z" + } + }, { + "quantity": { + "unit": { + "id": "NCIT:C67388", + "label": "Liter per Minute" + }, + "value": 50.0 + }, + "scheduleFrequency": { + "id": "PATO:0000689", + "label": "continuous" + }, + "interval": { + "start": "2021-02-02T08:22:42Z", + "end": "2021-02-02T12:22:42Z" + } + }] + } + }, { + "treatment": { + "agent": { + "id": "CHEBI:41879", + "label": "dexamethasone" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "UO:0000022", + "label": "milligram" + }, + "value": 6.0 + }, + "scheduleFrequency": { + "id": "NCIT:C125004", + "label": "Once Daily" + }, + "interval": { + "start": "2020-03-20T00:00:00Z", + "end": "2020-03-30T00:00:00Z" + } + }] + } + }, { + "procedure": { + "code": { + "id": "NCIT:C116648", + "label": "Tracheal Intubation" + }, + "performed": { + "timestamp": "2020-03-22T00:00:00Z" + } + } + }, { + "treatment": { + "agent": { + "id": "NCIT:C722", + "label": "Oxygen" + }, + "routeOfAdministration": { + "id": "NCIT:C50254", + "label": "Positive end Expiratory Pressure Valve Device" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "NCIT:C91060", + "label": "Centimeters of Water" + }, + "value": 14.0 + }, + "scheduleFrequency": { + "id": "PATO:0000689", + "label": "continuous" + }, + "interval": { + "start": "2020-03-22T00:00:00Z", + "end": "2020-03-28T00:00:00Z" + } + }] + } + }, { + "treatment": { + "agent": { + "id": "NCIT:C84217", + "label": "Tocilizumab" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "NCIT:C124458", + "label": "Milligram per Kilogram per Dose" + }, + "value": 4.0 + }, + "scheduleFrequency": { + "id": "NCIT:C64529", + "label": "Every Four Weeks" + }, + "interval": { + "start": "2020-03-24T00:00:00Z", + "end": "2020-03-28T00:00:00Z" + } + }] + } + }], + "metaData": { + "created": "2021-08-17T00:00:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "2019-11-26", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, { + "id": "mondo", + "name": "Mondo Disease Ontology", + "url": "http://purl.obolibrary.org/obo/mondo.obo", + "version": "2021-11-26", + "namespacePrefix": "MONDO", + "iriPrefix": "http://purl.obolibrary.org/obo/MONDO_" + }], + "phenopacketSchemaVersion": "2.0", + "externalReferences": [{ + "id": "DOI:10.1016/j.jaccas.2020.04.001", + "reference": "PMID:32292915", + "description": "The Imperfect Cytokine Storm: Severe COVID-19 With ARDS in a Patient on Durable LVAD Support" + }] + } +} \ No newline at end of file diff --git a/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.yml b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.yml new file mode 100644 index 00000000..5a00cd72 --- /dev/null +++ b/phenopacket-tools-util/src/test/resources/org/phenopackets/phenopackettools/util/format/covid.yml @@ -0,0 +1,214 @@ +phenopacket: + id: "arbitrary.phenopacket.id" + subject: + id: "P123542" + timeAtLastEncounter: + age: + iso8601duration: "P70Y" + vitalStatus: + status: "DECEASED" + causeOfDeath: + id: "MONDO:0100096" + label: "COVID-19" + sex: "MALE" + phenotypicFeatures: + - type: + id: "HP:0001945" + label: "Fever " + onset: + timestamp: "2021-02-01T05:00:00Z" + - type: + id: "HP:0030157" + label: "Flank pain" + onset: + timestamp: "2021-02-01T05:00:00Z" + - type: + id: "HP:0000790" + label: "Hematuria" + onset: + timestamp: "2021-02-01T05:00:00Z" + - type: + id: "HP:0012625" + label: "Stage 3 chronic kidney disease" + onset: + timestamp: "2021-02-01T05:00:00Z" + - type: + id: "HP:0003326" + label: "Myalgia" + onset: + interval: + start: "2020-03-18T00:00:00Z" + end: "2020-03-20T00:00:00Z" + - type: + id: "HP:0002014" + label: "Diarrhea" + onset: + interval: + start: "2020-03-18T00:00:00Z" + end: "2020-03-20T00:00:00Z" + - type: + id: "HP:0002094" + label: "Dyspnea" + onset: + interval: + start: "2020-03-18T00:00:00Z" + end: "2020-03-20T00:00:00Z" + - type: + id: "HP:0033677" + label: "Acute respiratory distress syndrome" + onset: + timestamp: "2020-03-20T00:00:00Z" + measurements: + - assay: + id: "LOINC:26474-7" + label: "Lymphocytes [#/volume] in Blood" + value: + quantity: + unit: + id: "NCIT:C67245" + label: "Thousand Cells" + value: 1.4 + timeObserved: + interval: + start: "2019-09-01T00:00:00Z" + end: "2020-03-01T00:00:00Z" + - assay: + id: "LOINC:26474-7" + label: "Lymphocytes [#/volume] in Blood" + value: + quantity: + unit: + id: "NCIT:C67245" + label: "Thousand Cells" + value: 0.7 + timeObserved: + timestamp: "2020-03-20T00:00:00Z" + diseases: + - term: + id: "MONDO:0005015" + label: "diabetes mellitus" + excluded: true + - term: + id: "MONDO:0004994" + label: "cardiomyopathy" + - term: + id: "MONDO:0100096" + label: "COVID-19" + onset: + timestamp: "2020-03-17T00:00:00Z" + medicalActions: + - procedure: + code: + id: "NCIT:C80473" + label: "Left Ventricular Assist Device" + performed: + timestamp: "2016-01-01T00:00:00Z" + - treatment: + agent: + id: "NCIT:C722" + label: "Oxygen" + routeOfAdministration: + id: "NCIT:C38284" + label: "Nasal Route of Administration" + doseIntervals: + - quantity: + unit: + id: "NCIT:C67388" + label: "Liter per Minute" + value: 2.0 + scheduleFrequency: + id: "PATO:0000689" + label: "continuous" + interval: + start: "2021-02-01T18:58:43Z" + end: "2021-02-02T08:22:42Z" + - quantity: + unit: + id: "NCIT:C67388" + label: "Liter per Minute" + value: 50.0 + scheduleFrequency: + id: "PATO:0000689" + label: "continuous" + interval: + start: "2021-02-02T08:22:42Z" + end: "2021-02-02T12:22:42Z" + - treatment: + agent: + id: "CHEBI:41879" + label: "dexamethasone" + doseIntervals: + - quantity: + unit: + id: "UO:0000022" + label: "milligram" + value: 6.0 + scheduleFrequency: + id: "NCIT:C125004" + label: "Once Daily" + interval: + start: "2020-03-20T00:00:00Z" + end: "2020-03-30T00:00:00Z" + - procedure: + code: + id: "NCIT:C116648" + label: "Tracheal Intubation" + performed: + timestamp: "2020-03-22T00:00:00Z" + - treatment: + agent: + id: "NCIT:C722" + label: "Oxygen" + routeOfAdministration: + id: "NCIT:C50254" + label: "Positive end Expiratory Pressure Valve Device" + doseIntervals: + - quantity: + unit: + id: "NCIT:C91060" + label: "Centimeters of Water" + value: 14.0 + scheduleFrequency: + id: "PATO:0000689" + label: "continuous" + interval: + start: "2020-03-22T00:00:00Z" + end: "2020-03-28T00:00:00Z" + - treatment: + agent: + id: "NCIT:C84217" + label: "Tocilizumab" + doseIntervals: + - quantity: + unit: + id: "NCIT:C124458" + label: "Milligram per Kilogram per Dose" + value: 4.0 + scheduleFrequency: + id: "NCIT:C64529" + label: "Every Four Weeks" + interval: + start: "2020-03-24T00:00:00Z" + end: "2020-03-28T00:00:00Z" + metaData: + created: "2021-08-17T00:00:00Z" + createdBy: "anonymous biocurator" + resources: + - id: "ncit" + name: "NCI Thesaurus" + url: "http://purl.obolibrary.org/obo/ncit.owl" + version: "2019-11-26" + namespacePrefix: "NCIT" + iriPrefix: "http://purl.obolibrary.org/obo/NCIT_" + - id: "mondo" + name: "Mondo Disease Ontology" + url: "http://purl.obolibrary.org/obo/mondo.obo" + version: "2021-11-26" + namespacePrefix: "MONDO" + iriPrefix: "http://purl.obolibrary.org/obo/MONDO_" + phenopacketSchemaVersion: "2.0" + externalReferences: + - id: "DOI:10.1016/j.jaccas.2020.04.001" + reference: "PMID:32292915" + description: "The Imperfect Cytokine Storm: Severe COVID-19 With ARDS in a Patient\ + \ on Durable LVAD Support" diff --git a/phenopacket-tools-validator-jsonschema/pom.xml b/phenopacket-tools-validator-jsonschema/pom.xml index da9fdd48..d5e2171d 100644 --- a/phenopacket-tools-validator-jsonschema/pom.xml +++ b/phenopacket-tools-validator-jsonschema/pom.xml @@ -16,6 +16,11 @@ JSON schema validator utilities for phenopackets + + org.phenopackets.phenopackettools + phenopacket-tools-util + ${project.parent.version} + org.phenopackets.phenopackettools phenopacket-tools-validator-core diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java index 0f12e162..092a770f 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/module-info.java @@ -1,4 +1,5 @@ module org.phenopackets.phenopackettools.validator.jsonschema { + requires org.phenopackets.phenopackettools.util; requires transitive org.phenopackets.phenopackettools.validator.core; requires org.phenopackets.schema; requires com.google.protobuf.util; diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java index 7add0891..28918f30 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java +++ b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunner.java @@ -4,6 +4,9 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.protobuf.MessageOrBuilder; +import org.phenopackets.phenopackettools.util.format.FormatSniffer; +import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.phenopackets.phenopackettools.util.format.FormatSniffException; import org.phenopackets.phenopackettools.validator.core.*; import org.phenopackets.phenopackettools.validator.jsonschema.impl.JsonSchemaValidator; import org.phenopackets.schema.v2.CohortOrBuilder; @@ -159,11 +162,14 @@ public ValidationResults validate(T item) { } private String parseToString(byte[] payload) throws ConversionException { - if (Util.looksLikeJson(payload)) { - return new String(payload); - } else { - // Must be protobuf bytes, otherwise we explode. - return converter.toJson(payload); + try { + PhenopacketFormat format = FormatSniffer.sniff(payload); + return switch (format) { + case JSON, YAML -> new String(payload); + case PROTOBUF -> converter.toJson(payload); + }; + } catch (FormatSniffException e) { + throw new ConversionException(e); } } diff --git a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/Util.java b/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/Util.java deleted file mode 100644 index b8fb5676..00000000 --- a/phenopacket-tools-validator-jsonschema/src/main/java/org/phenopackets/phenopackettools/validator/jsonschema/Util.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.phenopackets.phenopackettools.validator.jsonschema; - -class Util { - - private Util() { - // static utility class - } - - static boolean looksLikeJson(byte[] payload) { - byte[] first32bytes = new byte[32]; - System.arraycopy(payload, 0, first32bytes, 0, first32bytes.length); - return looksLikeJson(new String(first32bytes)); - } - - static boolean looksLikeJson(String head) { - return head.replace("\\W+", "").startsWith("{"); - } -} diff --git a/pom.xml b/pom.xml index 58d78b8a..36531824 100644 --- a/pom.xml +++ b/pom.xml @@ -16,6 +16,7 @@ phenopacket-tools-test + phenopacket-tools-util phenopacket-tools-builder phenopacket-tools-validator-core phenopacket-tools-validator-jsonschema From bc3401e093bee1c7d812487580ff92f66e638fad Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 5 Oct 2022 20:45:39 -0400 Subject: [PATCH 04/20] Add `Gender` constants. Signed-off-by: Daniel Danis --- .../builder/constants/Gender.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java new file mode 100644 index 00000000..d3174f70 --- /dev/null +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/constants/Gender.java @@ -0,0 +1,25 @@ +package org.phenopackets.phenopackettools.builder.constants; + +import org.phenopackets.phenopackettools.builder.builders.OntologyClassBuilder; +import org.phenopackets.schema.v2.core.OntologyClass; + +public class Gender { + + private static final OntologyClass IDENTIFIES_AS_MALE = OntologyClassBuilder.ontologyClass("LOINC:LA22878-5", "Identifies as male"); + private static final OntologyClass IDENTIFIES_AS_FEMALE = OntologyClassBuilder.ontologyClass("LOINC:LA22879-3", "Identifies as female"); + private static final OntologyClass FEMALE_TO_MALE_TRANSSEXUAL = OntologyClassBuilder.ontologyClass("LOINC:LA22880-1", "Female-to-male transsexual"); + private static final OntologyClass MALE_TO_FEMALE_TRANSSEXUAL = OntologyClassBuilder.ontologyClass("LOINC:LA22881-9", "Male-to-female transsexual"); + private static final OntologyClass IDENTIFIES_AS_NON_CONFORMING = OntologyClassBuilder.ontologyClass("LOINC:LA22882-7", "Identifies as non-conforming"); + private static final OntologyClass OTHER_GENDER = OntologyClassBuilder.ontologyClass("LOINC:LA46-8", "other"); + private static final OntologyClass ASKED_BUT_UNKNOWN = OntologyClassBuilder.ontologyClass("LOINC:LA20384-6", "Asked but unknown"); + + + public static OntologyClass identifiesAsMale() { return IDENTIFIES_AS_MALE; } + public static OntologyClass identifiesAsFemale() { return IDENTIFIES_AS_FEMALE; } + public static OntologyClass femaleToMaleTranssexual() { return FEMALE_TO_MALE_TRANSSEXUAL; } + public static OntologyClass maleToFemaleTranssexual() { return MALE_TO_FEMALE_TRANSSEXUAL; } + public static OntologyClass identifiesAsNonConforming() { return IDENTIFIES_AS_NON_CONFORMING; } + public static OntologyClass otherGender() { return OTHER_GENDER; } + public static OntologyClass askedButUnknown() { return ASKED_BUT_UNKNOWN; } + +} From 2444e62593bf11ffb3d472e2f1f393583ac73cdc Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 7 Oct 2022 18:37:07 -0400 Subject: [PATCH 05/20] Setup common CLI options and I/O workflow. Signed-off-by: Daniel Danis --- .../command/BasePTCommand.java | 166 ++++++++++++++++++ .../command/ConvertCommand.java | 102 +++++------ .../src/main/resources/logback.xml | 9 +- .../command/BasePTCommandTest.java | 16 ++ .../util/format/PhenopacketFormat.java | 9 + 5 files changed, 242 insertions(+), 60 deletions(-) create mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasePTCommand.java create mode 100644 phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasePTCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasePTCommand.java new file mode 100644 index 00000000..161598ab --- /dev/null +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasePTCommand.java @@ -0,0 +1,166 @@ +package org.phenopackets.phenopackettools.command; + +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.util.format.FormatSniffException; +import org.phenopackets.phenopackettools.util.format.FormatSniffer; +import org.phenopackets.phenopackettools.util.format.PhenopacketElement; +import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.phenopackets.schema.v1.Cohort; +import org.phenopackets.schema.v1.Family; +import org.phenopackets.schema.v1.Phenopacket; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.Callable; + +/** + * Base command that defines routines for reading/writing input/output + * as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s for commands that process + * a single top-level Phenopacket schema element. + */ +public abstract class BasePTCommand implements Callable { + + private static final Logger LOGGER = LoggerFactory.getLogger(BasePTCommand.class); + + @CommandLine.Option(names = {"-i", "--input"}, + description = "Input phenopacket file (leave empty for STDIN)") + public Path input = null; + + @CommandLine.Option(names = {"-o", "--output"}, description = "Output file (leave empty for STDOUT)") + public Path output = null; + + // If the format is uninitialized, it will be sniffed. + @CommandLine.Option(names = {"-f", "--format"}, + description = "Phenopacket format (choose from {json,yaml,protobuf})") + public PhenopacketFormat format = null; + + @CommandLine.Option(names = {"-e", "--element"}, + description = "Top-level element (default: ${DEFAULT-VALUE})") + public PhenopacketElement element = PhenopacketElement.PHENOPACKET; + + protected Message readInputMessage() throws FormatSniffException, IOException { + InputStream is = null; + try { + is = openInput(); + if (format == null) + // Remember the provided or sniffed input format. + format = parseFormat(is); + + return switch (format) { + case PROTOBUF -> { + LOGGER.debug("Reading protobuf message"); + yield switch (element) { + case PHENOPACKET -> Phenopacket.parseFrom(is); + case FAMILY -> Family.parseFrom(is); + case COHORT -> Cohort.parseFrom(is); + }; + } + case JSON -> { + LOGGER.debug("Reading JSON message"); + BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + Message.Builder builder = prepareBuilder(element); + JsonFormat.parser().merge(reader, builder); + yield builder.build(); + } + case YAML -> { + // TODO - implement + throw new RuntimeException("YAML parser is not yet implemented"); + } + }; + } finally { + if (is != null && is != System.in) + is.close(); + } + } + + protected void writeV2Message(Message message, PhenopacketFormat format) throws IOException { + OutputStream os = null; + try { + os = openOutput(); + switch (format) { + case PROTOBUF -> { + LOGGER.debug("Writing protobuf message"); + message.writeTo(os); + } + case JSON -> { + LOGGER.debug("Writing JSON message"); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + JsonFormat.printer().appendTo(message, writer); + writer.flush(); + } + case YAML -> { + // TODO - implement + throw new RuntimeException("YAML printer is not yet implemented"); + } + } + } finally { + if (os != null && os != System.out) + os.close(); + } + } + + private InputStream openInput() throws IOException { + if (input == null) { + return System.in; + } else { + if (!Files.isRegularFile(input)) { + System.err.printf("The input file %s does not exist!%n", input.toAbsolutePath()); + System.exit(1); + } + LOGGER.info("Reading input from {}", input.toAbsolutePath()); + return new BufferedInputStream(Files.newInputStream(input)); + } + } + + private OutputStream openOutput() throws IOException { + if (output == null) { + // Write to STDOUT + return System.out; + } else { + Path parent = output.getParent(); + if (Files.isRegularFile(parent)) { + System.err.printf("The parent %s is a file%n", parent.toAbsolutePath()); + System.exit(1); + } + + if (!Files.isDirectory(parent)) + createParentDirectoriesOrExit(parent); + + LOGGER.info("Writing the output to {}", output.toAbsolutePath()); + return new BufferedOutputStream(Files.newOutputStream(output)); + } + } + + private static void createParentDirectoriesOrExit(Path parent) { + try { + LOGGER.info("Creating non-existing parent directories.."); + Files.createDirectories(parent); + } catch (IOException e) { + System.err.printf("Tried to create non-existent parent directories for %s but failed: %s%n", parent.toAbsolutePath(), e.getMessage()); + System.exit(1); + } + } + + private PhenopacketFormat parseFormat(InputStream is) throws IOException, FormatSniffException { + if (format == null) { + LOGGER.info("Input format was not provided, making an educated guess.."); + PhenopacketFormat fmt = FormatSniffer.sniff(is); + LOGGER.info("The input looks like a {} file", fmt); + return fmt; + } + return format; + } + + private static Message.Builder prepareBuilder(PhenopacketElement element) { + return switch (element) { + case PHENOPACKET -> Phenopacket.newBuilder(); + case FAMILY -> Family.newBuilder(); + case COHORT -> Cohort.newBuilder(); + }; + } +} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index 4d9dd9b6..ecbdb7fd 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -1,92 +1,82 @@ package org.phenopackets.phenopackettools.command; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.util.JsonFormat; +import com.google.protobuf.Message; import org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter; -import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.phenopackettools.util.format.FormatSniffException; +import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.phenopackets.schema.v1.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine.Command; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.concurrent.Callable; +import java.io.*; import static picocli.CommandLine.Option; -import static picocli.CommandLine.Parameters; @Command(name = "convert", mixinStandardHelpOptions = true, description = "Convert a v1.0 phenopacket to a v2.0 phenopacket.", footer = "Beware this process could be lossy!") -public class ConvertCommand implements Callable { +public class ConvertCommand extends BasePTCommand { - @Parameters(index = "0", arity = "1", description = "Input phenopacket file") - private Path input; + private static final Logger LOGGER = LoggerFactory.getLogger(ConvertCommand.class); - @Option(names = {"-o", "--output"}, description = "Output file") - public Path output = null; + @Option(names = {"--out-format"}, + description = "Output format (default: input format)") + public PhenopacketFormat outputFormat = null; - @Option(names = {"-ov","--out-version"}, description = "Version to convert to (default: ${DEFAULT-VALUE})") + @Option(names = {"-ov", "--out-version"}, + description = "Version to convert to (default: ${DEFAULT-VALUE})") public String outVersion = "2.0"; - @Option(names = {"--convert-variants"}, description = "Convert variant data (default: ${DEFAULT-VALUE})") + @Option(names = {"--convert-variants"}, + description = "Convert variant data (default: ${DEFAULT-VALUE})") public boolean convertVariants = false; + @Override public Integer call() { - if (!Files.isRegularFile(input)) { - System.err.println("Error! No such input file: " + input.toAbsolutePath()); + // (0) Check the inputs. + if (!outVersion.matches("2(\\.0)?(\\.0)?")) { + System.err.printf("Conversion to %s is not supported%n", outVersion); return 1; } - var builder = org.phenopackets.schema.v1.Phenopacket.newBuilder(); - try (BufferedReader reader = Files.newBufferedReader(input)) { - JsonFormat.parser().merge(reader, builder); - } catch (IOException e) { - System.err.println("Error! Unable to read input file, " + e.getMessage() + "\nPlease check the format of file " + input.toAbsolutePath()); + + // (1) Read the input v1 message. + Message message; + try { + message = readInputMessage(); + } catch (FormatSniffException e) { + System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); return 1; - } - var v1Phenopacket = builder.build(); - var inputFileVersion = v1Phenopacket.getMetaData().getPhenopacketSchemaVersion(); - if (!(inputFileVersion.equals("1.0") || inputFileVersion.equals("1.0.0"))) { - System.err.println("Error! This script converts version 1.0 to version 2.0 but the input file has version \"" + inputFileVersion + "\"."); + } catch (IOException e) { + System.err.println("Unable to read input file, " + e.getMessage() + "\nPlease check the format of file " + input.toAbsolutePath()); return 1; } + // (2) Convert into v2 format + if (convertVariants) + LOGGER.debug("Converting variants"); + V1ToV2Converter converter = V1ToV2Converter.of(convertVariants); - Phenopacket v2 = converter.convertPhenopacket(v1Phenopacket); + Message v2 = switch (element) { + case PHENOPACKET -> converter.convertPhenopacket((Phenopacket) message); + case FAMILY -> converter.convertFamily((Family) message); + case COHORT -> converter.convertCohort((Cohort) message); + }; - String json; + // (3) Write v2 into output using either the input format or the selected output format. + if (outputFormat == null) + outputFormat = format; try { - json = JsonFormat.printer().print(v2); - } catch (InvalidProtocolBufferException ex) { - System.err.println("Unable to convert v" + outVersion + " phenopacket to json. " + ex.getMessage()); + writeV2Message(v2, outputFormat); + } catch (IOException e) { + System.err.println("Could not write v" + outVersion + " phenopacket to file " + output + " : " + e.getMessage()); return 1; } - if (output == null) { - System.out.println(json); - } else { -// Path v2File = Objects.requireNonNullElseGet(output, () -> getV2FileName(input)); - try (BufferedWriter writer = Files.newBufferedWriter(output)) { - writer.write(json); - writer.newLine(); - } catch (IOException e) { - System.err.println("Could not write v" + outVersion + " phenopacket to file " + output + " : " + e.getMessage()); - return 1; - } - } + + // We're done! return 0; } - /** - * Add "-v2" to an appropriate place in the file name - * @param input filename (possibly path) of v1 phenopacket - * @return corresponding v2 filename (possibly path) - */ - private Path getV2FileName(Path input) { - String inputFileName = input.getFileName().toString(); - String v2FileName = inputFileName.contains(".") ? inputFileName.replace(".", "-v2.") : inputFileName + "-v2"; - return Path.of(v2FileName); - } } diff --git a/phenopacket-tools-cli/src/main/resources/logback.xml b/phenopacket-tools-cli/src/main/resources/logback.xml index 81b18e1d..7b5f00ec 100644 --- a/phenopacket-tools-cli/src/main/resources/logback.xml +++ b/phenopacket-tools-cli/src/main/resources/logback.xml @@ -2,10 +2,11 @@ - + - DEBUG + INFO + System.err ${pattern} @@ -14,7 +15,7 @@ - - + + \ No newline at end of file diff --git a/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java b/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java new file mode 100644 index 00000000..9074843a --- /dev/null +++ b/phenopacket-tools-cli/src/test/java/org/phenopackets/phenopackettools/command/BasePTCommandTest.java @@ -0,0 +1,16 @@ +package org.phenopackets.phenopackettools.command; + +import org.junit.jupiter.api.Test; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class BasePTCommandTest { + + @Test + public void markIsSupportedForStdin() { + // We need this functionality when sniffing format from STDIN. + assertThat(System.in.markSupported(), equalTo(true)); + } + +} \ No newline at end of file diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java index 4227f81a..2a579c81 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java @@ -12,6 +12,15 @@ public enum PhenopacketFormat { JSON, YAML; + @Override + public String toString() { + return switch (this) { + case PROTOBUF -> "protobuf"; + case JSON -> "json"; + case YAML -> "yaml"; + }; + } + public static PhenopacketFormat parse(String value) { switch (value.toLowerCase()) { case "protobuf": From baffdb2ea6e4b11e5339cf0ff9d48f5e6416637a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 7 Oct 2022 20:58:18 -0400 Subject: [PATCH 06/20] Write to STDOUT only, improve CLI documentation, logging. Add banner. Signed-off-by: Daniel Danis --- .../phenopackets/phenopackettools/Main.java | 9 +- .../command/ConvertCommand.java | 21 ++--- .../command/ExamplesCommand.java | 1 + ....java => SingleItemProcessingCommand.java} | 91 ++++++------------- .../util/format/PhenopacketElement.java | 31 ++++++- .../util/format/PhenopacketFormat.java | 1 + 6 files changed, 75 insertions(+), 79 deletions(-) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/{BasePTCommand.java => SingleItemProcessingCommand.java} (59%) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java index aeecb27b..ef090f9d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java @@ -25,7 +25,14 @@ footer = Main.FOOTER) public class Main { - public static final String HEADER = "\nAn application for creating, converting and validating GA4GH phenopackets\n"; + private static final String BANNER = """ + __ __ __ __ __ + ___ / / ___ ___ ___ ___ ___ _____/ /_____ / /_ / /____ ___ / /__ + / _ \\/ _ \\/ -_) _ \\/ _ \\/ _ \\/ _ `/ __/ '_/ -_) __/ / __/ _ \\/ _ \\/ (_-< + / .__/_//_/\\__/_//_/\\___/ .__/\\_,_/\\__/_/\\_\\\\__/\\__/ \\__/\\___/\\___/_/___/ + /_/ /_/ + """; + public static final String HEADER = BANNER + "\nAn application for creating, converting and validating GA4GH phenopackets.\n"; public static final String VERSION = "phenopacket-tools v0.4.6-SNAPSHOT"; // Maximum number of characters in line of the usage message. diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index ecbdb7fd..d2049c2c 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -15,33 +15,24 @@ @Command(name = "convert", mixinStandardHelpOptions = true, + sortOptions = false, description = "Convert a v1.0 phenopacket to a v2.0 phenopacket.", footer = "Beware this process could be lossy!") -public class ConvertCommand extends BasePTCommand { +public class ConvertCommand extends SingleItemProcessingCommand { private static final Logger LOGGER = LoggerFactory.getLogger(ConvertCommand.class); - @Option(names = {"--out-format"}, - description = "Output format (default: input format)") + @Option(names = {"-o", "--output-format"}, + description = "Output format.%nDefault: input format") public PhenopacketFormat outputFormat = null; - @Option(names = {"-ov", "--out-version"}, - description = "Version to convert to (default: ${DEFAULT-VALUE})") - public String outVersion = "2.0"; - @Option(names = {"--convert-variants"}, - description = "Convert variant data (default: ${DEFAULT-VALUE})") + description = "Convert variant data.%nDefault: ${DEFAULT-VALUE}") public boolean convertVariants = false; @Override public Integer call() { - // (0) Check the inputs. - if (!outVersion.matches("2(\\.0)?(\\.0)?")) { - System.err.printf("Conversion to %s is not supported%n", outVersion); - return 1; - } - // (1) Read the input v1 message. Message message; try { @@ -71,7 +62,7 @@ public Integer call() { try { writeV2Message(v2, outputFormat); } catch (IOException e) { - System.err.println("Could not write v" + outVersion + " phenopacket to file " + output + " : " + e.getMessage()); + System.err.println("Could not write phenopacket: " + e.getMessage()); return 1; } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java index 1fa56f26..03250a2e 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java @@ -24,6 +24,7 @@ @Command(name = "examples", mixinStandardHelpOptions = true, + sortOptions = false, description = "Write example phenopackets to a directory.") public class ExamplesCommand implements Callable { diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasePTCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java similarity index 59% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasePTCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java index 161598ab..39e6656a 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasePTCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java @@ -19,29 +19,26 @@ import java.util.concurrent.Callable; /** - * Base command that defines routines for reading/writing input/output - * as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s for commands that process - * a single top-level Phenopacket schema element. + * A command that provides routines for reading/writing input/output + * as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s + * for processing of a single top-level Phenopacket schema element. */ -public abstract class BasePTCommand implements Callable { +public abstract class SingleItemProcessingCommand implements Callable { - private static final Logger LOGGER = LoggerFactory.getLogger(BasePTCommand.class); + private static final Logger LOGGER = LoggerFactory.getLogger(SingleItemProcessingCommand.class); @CommandLine.Option(names = {"-i", "--input"}, - description = "Input phenopacket file (leave empty for STDIN)") + description = "Input phenopacket.%nLeave empty for STDIN") public Path input = null; - @CommandLine.Option(names = {"-o", "--output"}, description = "Output file (leave empty for STDOUT)") - public Path output = null; - // If the format is uninitialized, it will be sniffed. @CommandLine.Option(names = {"-f", "--format"}, - description = "Phenopacket format (choose from {json,yaml,protobuf})") + description = "Phenopacket format.%nChoose from: {${COMPLETION-CANDIDATES}}") public PhenopacketFormat format = null; @CommandLine.Option(names = {"-e", "--element"}, - description = "Top-level element (default: ${DEFAULT-VALUE})") - public PhenopacketElement element = PhenopacketElement.PHENOPACKET; + description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") + public PhenopacketElement element = null; protected Message readInputMessage() throws FormatSniffException, IOException { InputStream is = null; @@ -51,6 +48,11 @@ protected Message readInputMessage() throws FormatSniffException, IOException { // Remember the provided or sniffed input format. format = parseFormat(is); + if (element == null) { + LOGGER.info("Input element type was not provided, assuming phenopacket.. "); + element = PhenopacketElement.PHENOPACKET; + } + return switch (format) { case PROTOBUF -> { LOGGER.debug("Reading protobuf message"); @@ -79,28 +81,22 @@ yield switch (element) { } protected void writeV2Message(Message message, PhenopacketFormat format) throws IOException { - OutputStream os = null; - try { - os = openOutput(); - switch (format) { - case PROTOBUF -> { - LOGGER.debug("Writing protobuf message"); - message.writeTo(os); - } - case JSON -> { - LOGGER.debug("Writing JSON message"); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); - JsonFormat.printer().appendTo(message, writer); - writer.flush(); - } - case YAML -> { - // TODO - implement - throw new RuntimeException("YAML printer is not yet implemented"); - } + OutputStream os = System.out; + switch (format) { + case PROTOBUF -> { + LOGGER.debug("Writing protobuf message"); + message.writeTo(os); + } + case JSON -> { + LOGGER.debug("Writing JSON message"); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + JsonFormat.printer().appendTo(message, writer); + writer.flush(); + } + case YAML -> { + // TODO - implement + throw new RuntimeException("YAML printer is not yet implemented"); } - } finally { - if (os != null && os != System.out) - os.close(); } } @@ -117,35 +113,6 @@ private InputStream openInput() throws IOException { } } - private OutputStream openOutput() throws IOException { - if (output == null) { - // Write to STDOUT - return System.out; - } else { - Path parent = output.getParent(); - if (Files.isRegularFile(parent)) { - System.err.printf("The parent %s is a file%n", parent.toAbsolutePath()); - System.exit(1); - } - - if (!Files.isDirectory(parent)) - createParentDirectoriesOrExit(parent); - - LOGGER.info("Writing the output to {}", output.toAbsolutePath()); - return new BufferedOutputStream(Files.newOutputStream(output)); - } - } - - private static void createParentDirectoriesOrExit(Path parent) { - try { - LOGGER.info("Creating non-existing parent directories.."); - Files.createDirectories(parent); - } catch (IOException e) { - System.err.printf("Tried to create non-existent parent directories for %s but failed: %s%n", parent.toAbsolutePath(), e.getMessage()); - System.exit(1); - } - } - private PhenopacketFormat parseFormat(InputStream is) throws IOException, FormatSniffException { if (format == null) { LOGGER.info("Input format was not provided, making an educated guess.."); diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java index 46e66f82..852c8ddc 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketElement.java @@ -1,5 +1,8 @@ package org.phenopackets.phenopackettools.util.format; +import java.util.Arrays; +import java.util.stream.Collectors; + /** * The enum members represent the top-level elements of the Phenopacket schema. */ @@ -7,6 +10,32 @@ public enum PhenopacketElement { PHENOPACKET, FAMILY, - COHORT + COHORT; + + @Override + public String toString() { + return switch (this) { + case PHENOPACKET -> "phenopacket"; + case FAMILY -> "family"; + case COHORT -> "cohort"; + }; + } + + public static PhenopacketElement parse(String value) { + switch (value.toLowerCase()) { + case "phenopacket": + return PHENOPACKET; + case "family": + return FAMILY; + case "cohort": + return COHORT; + default: + String expected = String.join(Arrays.stream(PhenopacketElement.values()) + .map(Enum::name) + .map(String::toLowerCase) + .collect(Collectors.joining(", ", "{", "}"))); + throw new IllegalArgumentException("Expected one of %s but got %s".formatted(expected, value)); + } + } } diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java index 2a579c81..e34f4497 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java @@ -24,6 +24,7 @@ public String toString() { public static PhenopacketFormat parse(String value) { switch (value.toLowerCase()) { case "protobuf": + case "pb": return PROTOBUF; case "json": return JSON; From c0d151b16882b69ecc456ca18bb3ef06d40b12f3 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 10 Oct 2022 13:47:28 -0400 Subject: [PATCH 07/20] Improve Javadocs, improve logging. Signed-off-by: Daniel Danis --- .../command/ConvertCommand.java | 9 +++++--- .../command/SingleItemProcessingCommand.java | 22 +++++++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index d2049c2c..ad19a233 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -56,11 +56,14 @@ public Integer call() { case COHORT -> converter.convertCohort((Cohort) message); }; - // (3) Write v2 into output using either the input format or the selected output format. - if (outputFormat == null) + // (3) Write v2 into STDOUT using either the input format or the selected output format. + OutputStream alwaysTheStandardOutput = System.out; + if (outputFormat == null) { + LOGGER.info("Output format (-o | --output-format) not provided, writing data in the input format `{}`", format); outputFormat = format; + } try { - writeV2Message(v2, outputFormat); + writeMessage(v2, outputFormat, alwaysTheStandardOutput); } catch (IOException e) { System.err.println("Could not write phenopacket: " + e.getMessage()); return 1; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java index 39e6656a..68b14b38 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java @@ -40,6 +40,16 @@ public abstract class SingleItemProcessingCommand implements Callable { description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") public PhenopacketElement element = null; + /** + * Read the input {@link Message} either from the standard input or from the provided {@link #input}. + *

+ * The method uses {@link #format} and {@link #element} to decode the input. In absence of the {@link #format}, + * we make an educated guess (sniff) and throw a {@link FormatSniffException} if the sniffing fails. + * + * @return the parsed {@link Message}. + * @throws FormatSniffException if the format sniffing fails. + * @throws IOException in case of I/O errors. + */ protected Message readInputMessage() throws FormatSniffException, IOException { InputStream is = null; try { @@ -80,8 +90,16 @@ yield switch (element) { } } - protected void writeV2Message(Message message, PhenopacketFormat format) throws IOException { - OutputStream os = System.out; + /** + * Write the {@code message} in an appropriate {@code format} into the provided {@link OutputStream} {@code os}. + *

+ * Uses {@link } + * @param message message to be written out. + * @param format format to write out + * @param os where to write + * @throws IOException in case of I/O errors during the output + */ + protected static void writeMessage(Message message, PhenopacketFormat format, OutputStream os) throws IOException { switch (format) { case PROTOBUF -> { LOGGER.debug("Writing protobuf message"); From c72e440f667ce8a67f9a08efc011681605d13b7d Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 10 Oct 2022 14:00:00 -0400 Subject: [PATCH 08/20] Separate functions for reading and writing within the base command. Signed-off-by: Daniel Danis --- .../command/ConvertCommand.java | 2 +- .../command/SingleItemIOCommand.java | 46 +++++++++++++++++++ ...mmand.java => SingleItemInputCommand.java} | 38 ++------------- 3 files changed, 52 insertions(+), 34 deletions(-) create mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/{SingleItemProcessingCommand.java => SingleItemInputCommand.java} (77%) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index ad19a233..6b2bf62f 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -18,7 +18,7 @@ sortOptions = false, description = "Convert a v1.0 phenopacket to a v2.0 phenopacket.", footer = "Beware this process could be lossy!") -public class ConvertCommand extends SingleItemProcessingCommand { +public class ConvertCommand extends SingleItemIOCommand { private static final Logger LOGGER = LoggerFactory.getLogger(ConvertCommand.class); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java new file mode 100644 index 00000000..354f05d2 --- /dev/null +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java @@ -0,0 +1,46 @@ +package org.phenopackets.phenopackettools.command; + +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; +import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; + +/** + * {@link SingleItemIOCommand} adds a writing routine to {@link SingleItemInputCommand}. + */ +public abstract class SingleItemIOCommand extends SingleItemInputCommand { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleItemIOCommand.class); + + /** + * Write the {@code message} in an appropriate {@code format} into the provided {@link OutputStream} {@code os}. + *

+ * Uses {@link } + * @param message message to be written out. + * @param format format to write out + * @param os where to write + * @throws IOException in case of I/O errors during the output + */ + protected static void writeMessage(Message message, PhenopacketFormat format, OutputStream os) throws IOException { + switch (format) { + case PROTOBUF -> { + LOGGER.debug("Writing protobuf message"); + message.writeTo(os); + } + case JSON -> { + LOGGER.debug("Writing JSON message"); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + JsonFormat.printer().appendTo(message, writer); + writer.flush(); + } + case YAML -> { + // TODO - implement + throw new RuntimeException("YAML printer is not yet implemented"); + } + } + } + +} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java similarity index 77% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java index 68b14b38..5d5d1c9c 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemProcessingCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java @@ -19,13 +19,12 @@ import java.util.concurrent.Callable; /** - * A command that provides routines for reading/writing input/output - * as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s + * A command that provides routines for reading as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s * for processing of a single top-level Phenopacket schema element. */ -public abstract class SingleItemProcessingCommand implements Callable { +public abstract class SingleItemInputCommand implements Callable { - private static final Logger LOGGER = LoggerFactory.getLogger(SingleItemProcessingCommand.class); + private static final Logger LOGGER = LoggerFactory.getLogger(SingleItemInputCommand.class); @CommandLine.Option(names = {"-i", "--input"}, description = "Input phenopacket.%nLeave empty for STDIN") @@ -36,6 +35,7 @@ public abstract class SingleItemProcessingCommand implements Callable { description = "Phenopacket format.%nChoose from: {${COMPLETION-CANDIDATES}}") public PhenopacketFormat format = null; + // TODO - is it too hard to implement element sniffing? @CommandLine.Option(names = {"-e", "--element"}, description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") public PhenopacketElement element = null; @@ -48,7 +48,7 @@ public abstract class SingleItemProcessingCommand implements Callable { * * @return the parsed {@link Message}. * @throws FormatSniffException if the format sniffing fails. - * @throws IOException in case of I/O errors. + * @throws IOException in case of I/O errors. */ protected Message readInputMessage() throws FormatSniffException, IOException { InputStream is = null; @@ -90,34 +90,6 @@ yield switch (element) { } } - /** - * Write the {@code message} in an appropriate {@code format} into the provided {@link OutputStream} {@code os}. - *

- * Uses {@link } - * @param message message to be written out. - * @param format format to write out - * @param os where to write - * @throws IOException in case of I/O errors during the output - */ - protected static void writeMessage(Message message, PhenopacketFormat format, OutputStream os) throws IOException { - switch (format) { - case PROTOBUF -> { - LOGGER.debug("Writing protobuf message"); - message.writeTo(os); - } - case JSON -> { - LOGGER.debug("Writing JSON message"); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); - JsonFormat.printer().appendTo(message, writer); - writer.flush(); - } - case YAML -> { - // TODO - implement - throw new RuntimeException("YAML printer is not yet implemented"); - } - } - } - private InputStream openInput() throws IOException { if (input == null) { return System.in; From 40286740886344287540d73f7750bbd01b703a82 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Oct 2022 13:14:19 -0400 Subject: [PATCH 09/20] Draft `validation` command and output formatting. Signed-off-by: Daniel Danis --- phenopacket-tools-cli/pom.xml | 10 + .../src/main/java/module-info.java | 1 + .../phenopackets/phenopackettools/Main.java | 13 +- .../phenopackettools/command/BaseCommand.java | 46 ++++ .../command/BasicValidateCommand.java | 19 -- .../command/ConvertCommand.java | 15 +- .../command/SingleItemInputCommand.java | 106 +++++++--- .../command/ValidateCommand.java | 200 ++++++++++++++++++ .../phenopackettools/application.properties | 2 + .../phenopackets/phenopackettools/banner.txt | 7 + pom.xml | 5 + 11 files changed, 353 insertions(+), 71 deletions(-) create mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java delete mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasicValidateCommand.java create mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java create mode 100644 phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties create mode 100644 phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt diff --git a/phenopacket-tools-cli/pom.xml b/phenopacket-tools-cli/pom.xml index e68d7810..c93a16be 100644 --- a/phenopacket-tools-cli/pom.xml +++ b/phenopacket-tools-cli/pom.xml @@ -59,9 +59,19 @@ org.monarchinitiative.phenol phenol-io + + org.apache.commons + commons-csv + + + + src/main/resources + true + + org.springframework.boot diff --git a/phenopacket-tools-cli/src/main/java/module-info.java b/phenopacket-tools-cli/src/main/java/module-info.java index 1f4d50b6..dbb1b933 100644 --- a/phenopacket-tools-cli/src/main/java/module-info.java +++ b/phenopacket-tools-cli/src/main/java/module-info.java @@ -10,6 +10,7 @@ requires com.google.protobuf.util; requires com.fasterxml.jackson.databind; requires com.fasterxml.jackson.dataformat.yaml; + requires commons.csv; requires info.picocli; requires org.slf4j; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java index ef090f9d..880926e1 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/Main.java @@ -1,6 +1,6 @@ package org.phenopackets.phenopackettools; -import org.phenopackets.phenopackettools.command.BasicValidateCommand; +import org.phenopackets.phenopackettools.command.ValidateCommand; import org.phenopackets.phenopackettools.command.ConvertCommand; import org.phenopackets.phenopackettools.command.ExamplesCommand; import picocli.AutoComplete; @@ -17,22 +17,15 @@ subcommands = { // see https://picocli.info/autocomplete.html AutoComplete.GenerateCompletion.class, - BasicValidateCommand.class, ConvertCommand.class, + ValidateCommand.class, ExamplesCommand.class, }, usageHelpWidth = Main.USAGE_WIDTH, footer = Main.FOOTER) public class Main { - private static final String BANNER = """ - __ __ __ __ __ - ___ / / ___ ___ ___ ___ ___ _____/ /_____ / /_ / /____ ___ / /__ - / _ \\/ _ \\/ -_) _ \\/ _ \\/ _ \\/ _ `/ __/ '_/ -_) __/ / __/ _ \\/ _ \\/ (_-< - / .__/_//_/\\__/_//_/\\___/ .__/\\_,_/\\__/_/\\_\\\\__/\\__/ \\__/\\___/\\___/_/___/ - /_/ /_/ - """; - public static final String HEADER = BANNER + "\nAn application for creating, converting and validating GA4GH phenopackets.\n"; + public static final String HEADER = "phenopacket-tools\nAn application for creating, converting and validating GA4GH phenopackets.\n"; public static final String VERSION = "phenopacket-tools v0.4.6-SNAPSHOT"; // Maximum number of characters in line of the usage message. diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java new file mode 100644 index 00000000..929fd50a --- /dev/null +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseCommand.java @@ -0,0 +1,46 @@ +package org.phenopackets.phenopackettools.command; + +import org.phenopackets.phenopackettools.Main; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; +import java.util.concurrent.Callable; + +public abstract class BaseCommand implements Callable { + + private static final Logger LOGGER = LoggerFactory.getLogger(BaseCommand.class); + + protected static final String BANNER = readBanner(); + + protected static final Properties APPLICATION_PROPERTIES = readApplicationProperties(); + + protected static final String PHENOPACKET_TOOLS_VERSION = APPLICATION_PROPERTIES.getProperty("phenopacket-tools.version", "UNKNOWN-version"); + + private static String readBanner() { + try (InputStream is = Main.class.getResourceAsStream("banner.txt")) { + return is == null ? "" : new String(is.readAllBytes()); + } catch (IOException e) { + LOGGER.error("Unable to read banner. Please report to the developers: {}", e.getMessage(), e); + return ""; + } + } + + private static Properties readApplicationProperties() { + Properties properties = new Properties(); + try (InputStream is = Main.class.getResourceAsStream("application.properties")) { + properties.load(is); + } catch (IOException e) { + // Complain and swallow. We are not stopping the entire app just for this. + LOGGER.error("Unable to read the application.properties file. Please report to the developers: {}", e.getMessage(), e); + } + return properties; + } + + protected static void printBanner() { + System.err.println(BANNER); + } + +} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasicValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasicValidateCommand.java deleted file mode 100644 index 94c7bd16..00000000 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BasicValidateCommand.java +++ /dev/null @@ -1,19 +0,0 @@ -package org.phenopackets.phenopackettools.command; - - -import org.phenopackets.phenopackettools.command.validate.ValidateCohortCommand; -import org.phenopackets.phenopackettools.command.validate.ValidateFamilyCommand; -import org.phenopackets.phenopackettools.command.validate.ValidatePhenopacketCommand; -import picocli.CommandLine.Command; - -@Command(name = "validate", - description = "Validate top-level elements of the Phenopacket schema.", - mixinStandardHelpOptions = true, - subcommands = { - ValidatePhenopacketCommand.class, - ValidateFamilyCommand.class, - ValidateCohortCommand.class - }) -public class BasicValidateCommand { - -} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index 6b2bf62f..6dfe2334 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -2,7 +2,6 @@ import com.google.protobuf.Message; import org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter; -import org.phenopackets.phenopackettools.util.format.FormatSniffException; import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; import org.phenopackets.schema.v1.*; import org.slf4j.Logger; @@ -33,17 +32,11 @@ public class ConvertCommand extends SingleItemIOCommand { @Override public Integer call() { + // (0) Print banner. + printBanner(); + // (1) Read the input v1 message. - Message message; - try { - message = readInputMessage(); - } catch (FormatSniffException e) { - System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); - return 1; - } catch (IOException e) { - System.err.println("Unable to read input file, " + e.getMessage() + "\nPlease check the format of file " + input.toAbsolutePath()); - return 1; - } + Message message = readMessageOrExit(PhenopacketSchemaVersion.V1); // (2) Convert into v2 format if (convertVariants) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java index 5d5d1c9c..a7a440c9 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java @@ -16,13 +16,12 @@ import java.io.*; import java.nio.file.Files; import java.nio.file.Path; -import java.util.concurrent.Callable; /** * A command that provides routines for reading as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s * for processing of a single top-level Phenopacket schema element. */ -public abstract class SingleItemInputCommand implements Callable { +public abstract class SingleItemInputCommand extends BaseCommand { private static final Logger LOGGER = LoggerFactory.getLogger(SingleItemInputCommand.class); @@ -30,7 +29,7 @@ public abstract class SingleItemInputCommand implements Callable { description = "Input phenopacket.%nLeave empty for STDIN") public Path input = null; - // If the format is uninitialized, it will be sniffed. + // The format will be sniffed if it is uninitialized. @CommandLine.Option(names = {"-f", "--format"}, description = "Phenopacket format.%nChoose from: {${COMPLETION-CANDIDATES}}") public PhenopacketFormat format = null; @@ -40,6 +39,25 @@ public abstract class SingleItemInputCommand implements Callable { description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") public PhenopacketElement element = null; + /** + * Attempt to read the input in the provided {@code schemaVersion} and exit upon any failure. As a side effect, + * {@link #format} and {@link #element} fields are set after the function returns. + *

+ * Note that the function does not return if reading fails. + */ + protected Message readMessageOrExit(PhenopacketSchemaVersion schemaVersion) { + try { + return readInputMessage(schemaVersion); + } catch (FormatSniffException e) { + System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); + System.exit(1); + } catch (IOException e) { + System.err.println("Unable to read input file, " + e.getMessage() + "\nPlease check the format of file " + input.toAbsolutePath()); + System.exit(1); + } + return null; // Cannot happen but to make the compiler happy.. + } + /** * Read the input {@link Message} either from the standard input or from the provided {@link #input}. *

@@ -50,7 +68,7 @@ public abstract class SingleItemInputCommand implements Callable { * @throws FormatSniffException if the format sniffing fails. * @throws IOException in case of I/O errors. */ - protected Message readInputMessage() throws FormatSniffException, IOException { + private Message readInputMessage(PhenopacketSchemaVersion schemaVersion) throws FormatSniffException, IOException { InputStream is = null; try { is = openInput(); @@ -63,33 +81,62 @@ protected Message readInputMessage() throws FormatSniffException, IOException { element = PhenopacketElement.PHENOPACKET; } - return switch (format) { - case PROTOBUF -> { - LOGGER.debug("Reading protobuf message"); - yield switch (element) { - case PHENOPACKET -> Phenopacket.parseFrom(is); - case FAMILY -> Family.parseFrom(is); - case COHORT -> Cohort.parseFrom(is); - }; - } - case JSON -> { - LOGGER.debug("Reading JSON message"); - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); - Message.Builder builder = prepareBuilder(element); - JsonFormat.parser().merge(reader, builder); - yield builder.build(); - } - case YAML -> { - // TODO - implement - throw new RuntimeException("YAML parser is not yet implemented"); - } - }; + return parseMessage(schemaVersion, is); } finally { if (is != null && is != System.in) is.close(); } } + private Message parseMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { + return switch (format) { + case PROTOBUF -> readProtobufMessage(schemaVersion, is); + case JSON -> readJsonMessage(schemaVersion, is); + // TODO - implement YAML parsing + case YAML -> throw new RuntimeException("YAML parser is not yet implemented"); + }; + } + + private Message readProtobufMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { + LOGGER.debug("Reading protobuf message"); + return switch (schemaVersion) { + case V1 -> switch (element) { + case PHENOPACKET -> Phenopacket.parseFrom(is); + case FAMILY -> Family.parseFrom(is); + case COHORT -> Cohort.parseFrom(is); + }; + case V2 -> switch (element) { + + case PHENOPACKET -> org.phenopackets.schema.v2.Phenopacket.parseFrom(is); + case FAMILY -> org.phenopackets.schema.v2.Family.parseFrom(is); + case COHORT -> org.phenopackets.schema.v2.Cohort.parseFrom(is); + }; + }; + } + + private Message readJsonMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { + LOGGER.debug("Reading JSON message"); + BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + Message.Builder builder = prepareBuilder(schemaVersion, element); + JsonFormat.parser().merge(reader, builder); + return builder.build(); + } + + private static Message.Builder prepareBuilder(PhenopacketSchemaVersion schemaVersion, PhenopacketElement element) { + return switch (schemaVersion) { + case V1 -> switch (element) { + case PHENOPACKET -> org.phenopackets.schema.v1.Phenopacket.newBuilder(); + case FAMILY -> org.phenopackets.schema.v1.Family.newBuilder(); + case COHORT -> org.phenopackets.schema.v1.Cohort.newBuilder(); + }; + case V2 -> switch (element) { + case PHENOPACKET -> org.phenopackets.schema.v2.Phenopacket.newBuilder(); + case FAMILY -> org.phenopackets.schema.v2.Family.newBuilder(); + case COHORT -> org.phenopackets.schema.v2.Cohort.newBuilder(); + }; + }; + } + private InputStream openInput() throws IOException { if (input == null) { return System.in; @@ -113,11 +160,8 @@ private PhenopacketFormat parseFormat(InputStream is) throws IOException, Format return format; } - private static Message.Builder prepareBuilder(PhenopacketElement element) { - return switch (element) { - case PHENOPACKET -> Phenopacket.newBuilder(); - case FAMILY -> Family.newBuilder(); - case COHORT -> Cohort.newBuilder(); - }; + protected enum PhenopacketSchemaVersion { + V1, + V2; } } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java new file mode 100644 index 00000000..8fc55349 --- /dev/null +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java @@ -0,0 +1,200 @@ +package org.phenopackets.phenopackettools.command; + + +import com.google.protobuf.Message; +import com.google.protobuf.MessageOrBuilder; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.phenopackettools.validator.core.*; +import org.phenopackets.phenopackettools.validator.core.metadata.MetaDataValidators; +import org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators; +import org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner; +import org.phenopackets.schema.v2.CohortOrBuilder; +import org.phenopackets.schema.v2.FamilyOrBuilder; +import org.phenopackets.schema.v2.PhenopacketOrBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; +import picocli.CommandLine.Command; + +import java.io.*; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; + +@Command(name = "validate", + description = "Validate top-level elements of the Phenopacket schema.", + sortOptions = false, + mixinStandardHelpOptions = true) +public class ValidateCommand extends SingleItemInputCommand { + + private static final Logger LOGGER = LoggerFactory.getLogger(ValidateCommand.class); + + @CommandLine.Option(names = {"--require"}, + arity = "*", + description = "Path to JSON schema with additional requirements to enforce.") + protected List requirements = List.of(); + + @CommandLine.Option(names = "--hpo", + description = "Path to hp.json file") + protected Path hpJson; + + @Override + public Integer call() { + // (0) Print banner. + printBanner(); + + // (1) Read the input v2 message. + Message message = readMessageOrExit(PhenopacketSchemaVersion.V2); + + // (2) Set up the validator. + ValidationWorkflowRunner runner = prepareWorkflowRunner(); + + // (3) Validate the message(s). + ValidationResults results = runner.validate(message); + + // (4) Write out the validation results. + PrintStream printToStdoutSoFar = System.out; + return writeValidationResults(results, printToStdoutSoFar); + } + + private ValidationWorkflowRunner prepareWorkflowRunner() { + List customJsonSchemas = prepareCustomSchemaUrls(); + Object runner = switch (element) { + case PHENOPACKET -> { + List> semanticValidators = configureSemanticValidators(); + yield JsonSchemaValidationWorkflowRunner.phenopacketBuilder() + .addAllJsonSchemaUrls(customJsonSchemas) + .addSemanticValidator(MetaDataValidators.phenopacketValidator()) + .addAllSemanticValidators(semanticValidators) + .build(); + } + case FAMILY -> { + List> semanticValidators = configureSemanticValidators(); + yield JsonSchemaValidationWorkflowRunner.familyBuilder() + .addAllJsonSchemaUrls(customJsonSchemas) + .addSemanticValidator(MetaDataValidators.familyValidator()) + .addAllSemanticValidators(semanticValidators) + .build(); + } + case COHORT -> { + List> semanticValidators = configureSemanticValidators(); + yield JsonSchemaValidationWorkflowRunner.cohortBuilder() + .addAllJsonSchemaUrls(customJsonSchemas) + .addSemanticValidator(MetaDataValidators.cohortValidator()) + .addAllSemanticValidators(semanticValidators) + .build(); + } + }; + + // Same as in `configureSemanticValidators`, we rely on the correct pairing of `element` and `message`s + // to be validated. The code will explode if this assumption is invalid. + //noinspection unchecked + return (ValidationWorkflowRunner) runner; + } + + private List prepareCustomSchemaUrls() { + LOGGER.debug("Preparing schemas for custom requirement validation"); + List urls = new ArrayList<>(); + for (Path requirement : requirements) { + try { + urls.add(requirement.toUri().toURL()); + } catch (MalformedURLException e) { + System.err.printf("Skipping JSON schema at '%s', the path is invalid: %s%n", requirement.toAbsolutePath(), e.getMessage()); + } + } + LOGGER.debug("Prepared {} custom schema(s)", urls.size()); + return urls; + } + + /** + * Prepare semantic validators for given {@link T}. + *

+ * Warning - it is important to request the {@link T} that is appropriate for the current {@link #element}. + * The app will crash and burn if e.g. {@link T} is {@link PhenopacketOrBuilder} while {@link #element} + * is {@link org.phenopackets.phenopackettools.util.format.PhenopacketElement#FAMILY}. + */ + private List> configureSemanticValidators() { + // Right now we only have one semantic validator, but we'll extend this in the future. + LOGGER.debug("Configuring semantic validators"); + List> validators = new ArrayList<>(); + if (hpJson != null) { + LOGGER.debug("Reading HPO from '{}}'", hpJson.toAbsolutePath()); + Ontology hpo = OntologyLoader.loadOntology(hpJson.toFile()); + + // The entire logic of this command stands and falls on correct state of `element` and the read message(s). + // This method requires an appropriate combination of `T` and `element`, as described in Javadoc. + // We suppress warning and perform an unchecked cast here, assuming `T` and `element` are appropriate. + // The app will crash and burn if this is not the case. + PhenopacketValidator validator = switch (element) { + case PHENOPACKET -> //noinspection unchecked + (PhenopacketValidator) HpoPhenotypeValidators.phenopacketHpoPhenotypeValidator(hpo); + case FAMILY -> //noinspection unchecked + (PhenopacketValidator) HpoPhenotypeValidators.familyHpoPhenotypeValidator(hpo); + case COHORT -> //noinspection unchecked + (PhenopacketValidator) HpoPhenotypeValidators.cohortHpoPhenotypeValidator(hpo); + }; + validators.add(validator); + } + + LOGGER.debug("Configured {} semantic validator(s)", validators.size()); + return validators; + } + + + private static int writeValidationResults(ValidationResults results, OutputStream os) { + CSVFormat format = CSVFormat.DEFAULT.builder() + .setCommentMarker('#') + .build(); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + + try { + CSVPrinter printer = format.print(writer); + printHeader(results, printer); + printValidationResults(results, printer); + } catch (IOException e) { + LOGGER.error("Error while writing out the validation results: {}", e.getMessage(), e); + return 1; + } finally { + try { + writer.flush(); + os.flush(); + if (os != System.out) + os.close(); + } catch (IOException e) { + LOGGER.error("Error while flushing and closing the writer: {}", e.getMessage(), e); + } + } + + return 0; // We're done + } + + private static void printHeader(ValidationResults results, CSVPrinter printer) throws IOException { + // Print header + printer.printComment("phenopacket-tools validate %s".formatted(PHENOPACKET_TOOLS_VERSION)); + printer.printComment("date=%s".formatted(LocalDateTime.now())); + + // Print validators + for (ValidatorInfo validator : results.validators()) { + printer.printComment("validator_id=%s;validator_name=%s;description=%s".formatted(validator.validatorId(), validator.validatorName(), validator.description())); + } + } + + private static void printValidationResults(ValidationResults results, CSVPrinter printer) throws IOException { + // Header + printer.printRecord("LEVEL", "VALIDATOR_ID", "CATEGORY", "MESSAGE"); + // Validation results + for (ValidationResult result : results.validationResults()) { + printer.print(result.level()); + printer.print(result.validatorInfo().validatorId()); + printer.print(result.category()); + printer.print(result.message()); + printer.println(); + } + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties new file mode 100644 index 00000000..ab6a31fd --- /dev/null +++ b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/application.properties @@ -0,0 +1,2 @@ +# Maven filters this file and replaces the placeholder with the actual version. +phenopacket-tools.version=@project.version@ diff --git a/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt new file mode 100644 index 00000000..02d429c8 --- /dev/null +++ b/phenopacket-tools-cli/src/main/resources/org/phenopackets/phenopackettools/banner.txt @@ -0,0 +1,7 @@ + __ __ __ __ __ + ___ / / ___ ___ ___ ___ ___ _____/ /_____ / /_ / /____ ___ / /__ + / _ \/ _ \/ -_) _ \/ _ \/ _ \/ _ `/ __/ '_/ -_) __/ / __/ _ \/ _ \/ (_-< + / .__/_//_/\__/_//_/\___/ .__/\_,_/\__/_/\_\\__/\__/ \__/\___/\___/_/___/ +/_/ /_/ + + v@project.version@ \ No newline at end of file diff --git a/pom.xml b/pom.xml index 36531824..3ce0f321 100644 --- a/pom.xml +++ b/pom.xml @@ -187,6 +187,11 @@ phenol-io ${phenol.version} + + org.apache.commons + commons-csv + 1.9.0 + info.picocli picocli From d3ab396949f04c997ab8cc2b30b81884610e43af Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Oct 2022 13:59:55 -0400 Subject: [PATCH 10/20] Print banner in `examples` command. Signed-off-by: Daniel Danis --- .../phenopackettools/command/ExamplesCommand.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java index 03250a2e..e46b69e3 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ExamplesCommand.java @@ -26,7 +26,7 @@ mixinStandardHelpOptions = true, sortOptions = false, description = "Write example phenopackets to a directory.") -public class ExamplesCommand implements Callable { +public class ExamplesCommand extends BaseCommand { @CommandLine.Option(names = {"-o", "--output"}, description = "Output directory (default: ${DEFAULT-VALUE})") @@ -35,6 +35,8 @@ public class ExamplesCommand implements Callable { @Override public Integer call() throws Exception { + printBanner(); + Path phenopacketDir = createADirectoryIfDoesNotExist(output.resolve("phenopackets")); Path familyDir = createADirectoryIfDoesNotExist(output.resolve("families")); Path cohortDir = createADirectoryIfDoesNotExist(output.resolve("cohorts")); From 91459f764c02407bb4b5c862901e05cbbe4f4287 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Oct 2022 17:57:16 -0400 Subject: [PATCH 11/20] Support multiple inputs as well as STDIN for `validate` and `convert`. Signed-off-by: Daniel Danis --- ...emInputCommand.java => BaseIOCommand.java} | 112 ++++++------ .../command/ConvertCommand.java | 164 ++++++++++++++++-- .../command/SingleItemIOCommand.java | 46 ----- .../command/ValidateCommand.java | 81 ++------- .../writer/CSVValidationResultsWriter.java | 91 ++++++++++ .../util/format/PhenopacketFormat.java | 11 ++ .../src/main/java/module-info.java | 1 + .../core/writer/ValidationResultsAndPath.java | 14 ++ .../core/writer/ValidationResultsWriter.java | 24 +++ 9 files changed, 365 insertions(+), 179 deletions(-) rename phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/{SingleItemInputCommand.java => BaseIOCommand.java} (61%) delete mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java create mode 100644 phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsAndPath.java create mode 100644 phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsWriter.java diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java similarity index 61% rename from phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java rename to phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java index a7a440c9..42bf9c93 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemInputCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java @@ -16,18 +16,21 @@ import java.io.*; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; /** * A command that provides routines for reading as well as {@link PhenopacketFormat}s and {@link PhenopacketElement}s * for processing of a single top-level Phenopacket schema element. */ -public abstract class SingleItemInputCommand extends BaseCommand { +public abstract class BaseIOCommand extends BaseCommand { - private static final Logger LOGGER = LoggerFactory.getLogger(SingleItemInputCommand.class); + private static final Logger LOGGER = LoggerFactory.getLogger(BaseIOCommand.class); @CommandLine.Option(names = {"-i", "--input"}, - description = "Input phenopacket.%nLeave empty for STDIN") - public Path input = null; + arity = "0..*", + description = "Input phenopacket(s).%nLeave empty for STDIN") + public List inputs = null; // The format will be sniffed if it is uninitialized. @CommandLine.Option(names = {"-f", "--format"}, @@ -45,46 +48,60 @@ public abstract class SingleItemInputCommand extends BaseCommand { *

* Note that the function does not return if reading fails. */ - protected Message readMessageOrExit(PhenopacketSchemaVersion schemaVersion) { - try { - return readInputMessage(schemaVersion); - } catch (FormatSniffException e) { - System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); - System.exit(1); - } catch (IOException e) { - System.err.println("Unable to read input file, " + e.getMessage() + "\nPlease check the format of file " + input.toAbsolutePath()); - System.exit(1); + protected List readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) { + if (inputs == null) { + // Assuming a single input is coming from STDIN + InputStream is = System.in; + try { + setFormatAndElement(is); + return List.of(new MessageAndPath(parseMessage(schemaVersion, is), null)); + } catch (FormatSniffException e) { + System.err.println("Unable to detect input format from STDIN.\nConsider using the `--format` option."); + System.exit(1); + } catch (IOException e) { + System.err.println("Unable to read STDIN: " + e.getMessage() + "\nPlease check the input format."); + System.exit(1); + } + } else { + // Assuming a one or more input are provided via `-i | --input`. + + // Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied. + assert !inputs.isEmpty(); + + List messages = new ArrayList<>(); + for (Path input : inputs) { + try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) { + setFormatAndElement(is); + Message message = parseMessage(schemaVersion, is); + messages.add(new MessageAndPath(message, input)); + } catch (FormatSniffException e) { + System.err.printf("Unable to detect input format of %s.\nConsider using the `--format` option.%n", input.toAbsolutePath()); + System.exit(1); + } catch (IOException e) { + System.err.printf("Unable to read input file %s: %s\nPlease check the input format.%n", input.toAbsolutePath(), e.getMessage()); + System.exit(1); + } + } + return messages; } - return null; // Cannot happen but to make the compiler happy.. + return null; // Cannot happen but to make the compiler happy... } - /** - * Read the input {@link Message} either from the standard input or from the provided {@link #input}. - *

- * The method uses {@link #format} and {@link #element} to decode the input. In absence of the {@link #format}, - * we make an educated guess (sniff) and throw a {@link FormatSniffException} if the sniffing fails. - * - * @return the parsed {@link Message}. - * @throws FormatSniffException if the format sniffing fails. - * @throws IOException in case of I/O errors. - */ - private Message readInputMessage(PhenopacketSchemaVersion schemaVersion) throws FormatSniffException, IOException { - InputStream is = null; - try { - is = openInput(); - if (format == null) - // Remember the provided or sniffed input format. - format = parseFormat(is); - - if (element == null) { - LOGGER.info("Input element type was not provided, assuming phenopacket.. "); - element = PhenopacketElement.PHENOPACKET; - } + private void setFormatAndElement(InputStream is) throws IOException, FormatSniffException { + PhenopacketFormat sniffed = parseFormat(is); + if (format == null) { + format = sniffed; + } else { + if (!format.equals(sniffed)) + // This can happen e.g. if processing multiple files at once but one turns out to be a different format. + // We emit warning because this is likely not what the user intended and the code will likely explode + // further downstream. + LOGGER.warn("Input format is set to {} but the current input looks like {}", format, sniffed); + } - return parseMessage(schemaVersion, is); - } finally { - if (is != null && is != System.in) - is.close(); + if (element == null) { + LOGGER.info("Input element type (-e | --element) was not provided, assuming phenopacket.."); + element = PhenopacketElement.PHENOPACKET; } } @@ -137,19 +154,6 @@ private static Message.Builder prepareBuilder(PhenopacketSchemaVersion schemaVer }; } - private InputStream openInput() throws IOException { - if (input == null) { - return System.in; - } else { - if (!Files.isRegularFile(input)) { - System.err.printf("The input file %s does not exist!%n", input.toAbsolutePath()); - System.exit(1); - } - LOGGER.info("Reading input from {}", input.toAbsolutePath()); - return new BufferedInputStream(Files.newInputStream(input)); - } - } - private PhenopacketFormat parseFormat(InputStream is) throws IOException, FormatSniffException { if (format == null) { LOGGER.info("Input format was not provided, making an educated guess.."); @@ -160,6 +164,8 @@ private PhenopacketFormat parseFormat(InputStream is) throws IOException, Format return format; } + protected record MessageAndPath(Message message, Path path) {} + protected enum PhenopacketSchemaVersion { V1, V2; diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index 6dfe2334..49cbb1e7 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -1,6 +1,7 @@ package org.phenopackets.phenopackettools.command; import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; import org.phenopackets.phenopackettools.converter.converters.V1ToV2Converter; import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; import org.phenopackets.schema.v1.*; @@ -9,6 +10,12 @@ import picocli.CommandLine.Command; import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static picocli.CommandLine.Option; @@ -17,14 +24,22 @@ sortOptions = false, description = "Convert a v1.0 phenopacket to a v2.0 phenopacket.", footer = "Beware this process could be lossy!") -public class ConvertCommand extends SingleItemIOCommand { +public class ConvertCommand extends BaseIOCommand { + /** + * A pattern to match file prefix + */ + private static final Pattern PATTERN = Pattern.compile("^(?.*)\\.((pb)|(json)|(yaml))$"); private static final Logger LOGGER = LoggerFactory.getLogger(ConvertCommand.class); @Option(names = {"-o", "--output-format"}, description = "Output format.%nDefault: input format") public PhenopacketFormat outputFormat = null; + @Option(names = {"-O", "--output-directory"}, + description = "Path to output directory") + public Path outputDirectory = null; + @Option(names = {"--convert-variants"}, description = "Convert variant data.%nDefault: ${DEFAULT-VALUE}") public boolean convertVariants = false; @@ -35,35 +50,148 @@ public Integer call() { // (0) Print banner. printBanner(); - // (1) Read the input v1 message. - Message message = readMessageOrExit(PhenopacketSchemaVersion.V1); + if (!checkInputArgumentsAreOk()) + return 1; + + // (1) Read the input v1 message(s). + List messages = readMessagesOrExit(PhenopacketSchemaVersion.V1); - // (2) Convert into v2 format + // (2) Convert into v2 format. if (convertVariants) LOGGER.debug("Converting variants"); V1ToV2Converter converter = V1ToV2Converter.of(convertVariants); - Message v2 = switch (element) { - case PHENOPACKET -> converter.convertPhenopacket((Phenopacket) message); - case FAMILY -> converter.convertFamily((Family) message); - case COHORT -> converter.convertCohort((Cohort) message); - }; - - // (3) Write v2 into STDOUT using either the input format or the selected output format. - OutputStream alwaysTheStandardOutput = System.out; + List converted = new ArrayList<>(messages.size()); + for (MessageAndPath mp : messages) { + Message message = mp.message(); + Message v2 = switch (element) { + case PHENOPACKET -> converter.convertPhenopacket((Phenopacket) message); + case FAMILY -> converter.convertFamily((Family) message); + case COHORT -> converter.convertCohort((Cohort) message); + }; + converted.add(new MessageAndPath(v2, mp.path())); + } + + // (3) Set the output format if necessary. if (outputFormat == null) { LOGGER.info("Output format (-o | --output-format) not provided, writing data in the input format `{}`", format); outputFormat = format; } - try { - writeMessage(v2, outputFormat, alwaysTheStandardOutput); - } catch (IOException e) { - System.err.println("Could not write phenopacket: " + e.getMessage()); - return 1; + + // (4) Write out the output(s). + return writeOutConverted(converted); + } + + /** + * Return {@code true} if CLI argument combination makes sense or {@code false} if the app should abort. + */ + private boolean checkInputArgumentsAreOk() { + if (inputs == null) { + if (outputDirectory != null) + LOGGER.warn("Output directory was provided but the input is coming from STDIN. The output will be written to STDOUT"); + } else { + if (inputs.isEmpty()) { + throw new RuntimeException("Input list should never be empty!"); // A bug guard. + } else { + if (inputs.size() > 1) { + if (outputDirectory == null) { + LOGGER.error("Output directory (-O | --output-directory) must be provided when processing >1 inputs"); + return false; + } else if (!Files.isDirectory(outputDirectory)) { + LOGGER.error("The `-O | --output-directory` argument {} is not a directory", outputDirectory.toAbsolutePath()); + return false; + } + } + } } + return true; + } - // We're done! + private int writeOutConverted(List converted) { + if (converted.size() == 1) { + // Writing out item, either from STDIN or from one `-i` options. + MessageAndPath mp = converted.get(0); + OutputStream os = null; + try { + // the input must have come from STDIN + if (mp.path() == null || outputDirectory == null) { + os = System.out; + } else { + os = openOutputStream(mp.path()); + } + writeMessage(mp.message(), outputFormat, os); + } catch (IOException e) { + LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); + return 1; + } finally { + if (os != null && os != System.out) { + try { + os.close(); + } catch (IOException e) { + LOGGER.warn("Error occurred while closing the output"); + } + } + } + } else { + // Writing out >1 items provided by `-i` options. + for (MessageAndPath mp : converted) { + try (OutputStream os = openOutputStream(mp.path())) { + writeMessage(mp.message(), outputFormat, os); + } catch (IOException e) { + LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); + return 1; + } + } + } return 0; } + private BufferedOutputStream openOutputStream(Path inputPath) throws IOException { + // remove suffix, add `v2` and add + String fileName = inputPath.toFile().getName(); + Matcher matcher = PATTERN.matcher(fileName); + + String suffix = ".v2" + outputFormat.suffix(); + Path output; + if (matcher.matches()) { + // Remove the prefix from the input file and create a new file + String prefix = matcher.group("prefix"); + output = outputDirectory.resolve(prefix + suffix); + } else { + // Just append the suffix. + output = outputDirectory.resolve(fileName + suffix); + } + LOGGER.debug("Input path: {}, output path: {}", inputPath.toAbsolutePath(), output.toAbsolutePath()); + + return new BufferedOutputStream(Files.newOutputStream(output)); + } + + /** + * Write the {@code message} in an appropriate {@code format} into the provided {@link OutputStream} {@code os}. + *

+ * Uses {@link } + * @param message message to be written out. + * @param format format to write out + * @param os where to write + * @throws IOException in case of I/O errors during the output + */ + protected static void writeMessage(Message message, PhenopacketFormat format, OutputStream os) throws IOException { + switch (format) { + case PROTOBUF -> { + LOGGER.debug("Writing protobuf message"); + message.writeTo(os); + } + case JSON -> { + LOGGER.debug("Writing JSON message"); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + JsonFormat.printer().appendTo(message, writer); + writer.flush(); + } + case YAML -> { + // TODO - implement + throw new RuntimeException("YAML printer is not yet implemented"); + } + } + } + } diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java deleted file mode 100644 index 354f05d2..00000000 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/SingleItemIOCommand.java +++ /dev/null @@ -1,46 +0,0 @@ -package org.phenopackets.phenopackettools.command; - -import com.google.protobuf.Message; -import com.google.protobuf.util.JsonFormat; -import org.phenopackets.phenopackettools.util.format.PhenopacketFormat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.*; - -/** - * {@link SingleItemIOCommand} adds a writing routine to {@link SingleItemInputCommand}. - */ -public abstract class SingleItemIOCommand extends SingleItemInputCommand { - - private static final Logger LOGGER = LoggerFactory.getLogger(SingleItemIOCommand.class); - - /** - * Write the {@code message} in an appropriate {@code format} into the provided {@link OutputStream} {@code os}. - *

- * Uses {@link } - * @param message message to be written out. - * @param format format to write out - * @param os where to write - * @throws IOException in case of I/O errors during the output - */ - protected static void writeMessage(Message message, PhenopacketFormat format, OutputStream os) throws IOException { - switch (format) { - case PROTOBUF -> { - LOGGER.debug("Writing protobuf message"); - message.writeTo(os); - } - case JSON -> { - LOGGER.debug("Writing JSON message"); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); - JsonFormat.printer().appendTo(message, writer); - writer.flush(); - } - case YAML -> { - // TODO - implement - throw new RuntimeException("YAML printer is not yet implemented"); - } - } - } - -} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java index 8fc55349..c1676b38 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java @@ -1,16 +1,15 @@ package org.phenopackets.phenopackettools.command; -import com.google.protobuf.Message; import com.google.protobuf.MessageOrBuilder; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVPrinter; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.phenopackets.phenopackettools.validator.core.*; import org.phenopackets.phenopackettools.validator.core.metadata.MetaDataValidators; import org.phenopackets.phenopackettools.validator.core.phenotype.HpoPhenotypeValidators; +import org.phenopackets.phenopackettools.validator.core.writer.ValidationResultsAndPath; import org.phenopackets.phenopackettools.validator.jsonschema.JsonSchemaValidationWorkflowRunner; +import org.phenopackets.phenopackettools.writer.CSVValidationResultsWriter; import org.phenopackets.schema.v2.CohortOrBuilder; import org.phenopackets.schema.v2.FamilyOrBuilder; import org.phenopackets.schema.v2.PhenopacketOrBuilder; @@ -31,7 +30,7 @@ description = "Validate top-level elements of the Phenopacket schema.", sortOptions = false, mixinStandardHelpOptions = true) -public class ValidateCommand extends SingleItemInputCommand { +public class ValidateCommand extends BaseIOCommand { private static final Logger LOGGER = LoggerFactory.getLogger(ValidateCommand.class); @@ -49,18 +48,27 @@ public Integer call() { // (0) Print banner. printBanner(); - // (1) Read the input v2 message. - Message message = readMessageOrExit(PhenopacketSchemaVersion.V2); + // (1) Read the input v2 message(s). + List messages = readMessagesOrExit(PhenopacketSchemaVersion.V2); // (2) Set up the validator. ValidationWorkflowRunner runner = prepareWorkflowRunner(); // (3) Validate the message(s). - ValidationResults results = runner.validate(message); + List results = new ArrayList<>(messages.size()); + for (MessageAndPath mp : messages) { + results.add(new ValidationResultsAndPath(runner.validate(mp.message()), mp.path())); + } - // (4) Write out the validation results. - PrintStream printToStdoutSoFar = System.out; - return writeValidationResults(results, printToStdoutSoFar); + // (4) Write out the validation results into STDOUT. + try { + CSVValidationResultsWriter writer = new CSVValidationResultsWriter(System.out, PHENOPACKET_TOOLS_VERSION, LocalDateTime.now()); + writer.writeValidationResults(runner.validators(), results); + return 0; + } catch (IOException e) { + LOGGER.error("Error while writing out results: {}", e.getMessage(), e); + return 1; + } } private ValidationWorkflowRunner prepareWorkflowRunner() { @@ -119,7 +127,7 @@ private List prepareCustomSchemaUrls() { * The app will crash and burn if e.g. {@link T} is {@link PhenopacketOrBuilder} while {@link #element} * is {@link org.phenopackets.phenopackettools.util.format.PhenopacketElement#FAMILY}. */ - private List> configureSemanticValidators() { + private List> configureSemanticValidators() { // Right now we only have one semantic validator, but we'll extend this in the future. LOGGER.debug("Configuring semantic validators"); List> validators = new ArrayList<>(); @@ -146,55 +154,4 @@ private List> configureSema return validators; } - - private static int writeValidationResults(ValidationResults results, OutputStream os) { - CSVFormat format = CSVFormat.DEFAULT.builder() - .setCommentMarker('#') - .build(); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); - - try { - CSVPrinter printer = format.print(writer); - printHeader(results, printer); - printValidationResults(results, printer); - } catch (IOException e) { - LOGGER.error("Error while writing out the validation results: {}", e.getMessage(), e); - return 1; - } finally { - try { - writer.flush(); - os.flush(); - if (os != System.out) - os.close(); - } catch (IOException e) { - LOGGER.error("Error while flushing and closing the writer: {}", e.getMessage(), e); - } - } - - return 0; // We're done - } - - private static void printHeader(ValidationResults results, CSVPrinter printer) throws IOException { - // Print header - printer.printComment("phenopacket-tools validate %s".formatted(PHENOPACKET_TOOLS_VERSION)); - printer.printComment("date=%s".formatted(LocalDateTime.now())); - - // Print validators - for (ValidatorInfo validator : results.validators()) { - printer.printComment("validator_id=%s;validator_name=%s;description=%s".formatted(validator.validatorId(), validator.validatorName(), validator.description())); - } - } - - private static void printValidationResults(ValidationResults results, CSVPrinter printer) throws IOException { - // Header - printer.printRecord("LEVEL", "VALIDATOR_ID", "CATEGORY", "MESSAGE"); - // Validation results - for (ValidationResult result : results.validationResults()) { - printer.print(result.level()); - printer.print(result.validatorInfo().validatorId()); - printer.print(result.category()); - printer.print(result.message()); - printer.println(); - } - } } \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java new file mode 100644 index 00000000..1040a24a --- /dev/null +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/writer/CSVValidationResultsWriter.java @@ -0,0 +1,91 @@ +package org.phenopackets.phenopackettools.writer; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.phenopackets.phenopackettools.validator.core.ValidationResult; +import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; +import org.phenopackets.phenopackettools.validator.core.writer.ValidationResultsAndPath; +import org.phenopackets.phenopackettools.validator.core.writer.ValidationResultsWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.time.LocalDateTime; +import java.util.List; + +/** + * Write {@link org.phenopackets.phenopackettools.validator.core.ValidationResults} into provided {@link OutputStream} + * in CSV format, including comments with validation metadata. + */ +public class CSVValidationResultsWriter implements ValidationResultsWriter { + + private static final Logger LOGGER = LoggerFactory.getLogger(CSVValidationResultsWriter.class); + + private final OutputStream os; + private final String phenopacketToolsVersion; + private final LocalDateTime dateTime; + + /** + * Create the writer using a given {@link OutputStream}. Note that the {@link OutputStream} is not closed. + * + * @param os where to write to + * @param phenopacketToolsVersion phenopacket tools version + * @param dateTime + */ + public CSVValidationResultsWriter(OutputStream os, String phenopacketToolsVersion, LocalDateTime dateTime) { + this.os = os; + this.phenopacketToolsVersion = phenopacketToolsVersion; + this.dateTime = dateTime; + } + + @Override + public void writeValidationResults(List validators, List results) throws IOException { + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + + try { + CSVPrinter printer = CSVFormat.DEFAULT.builder() + .setCommentMarker('#') + .build() + .print(writer); + printHeader(validators, printer); + printValidationResults(results, printer); + } finally { + try { + writer.flush(); + } catch (IOException e) { + LOGGER.warn("Error during flushing the writer: {}", e.getMessage(), e); + } + } + } + + private void printHeader(List results, CSVPrinter printer) throws IOException { + // Print header + printer.printComment("phenopacket-tools validate %s".formatted(phenopacketToolsVersion)); + printer.printComment("date=%s".formatted(dateTime)); + + // Print validators + for (ValidatorInfo validator : results) { + printer.printComment("validator_id=%s;validator_name=%s;description=%s".formatted(validator.validatorId(), validator.validatorName(), validator.description())); + } + } + + private static void printValidationResults(List results, CSVPrinter printer) throws IOException { + // Header + printer.printRecord("PATH", "LEVEL", "VALIDATOR_ID", "CATEGORY", "MESSAGE"); + // Validation results + for (ValidationResultsAndPath rp : results) { + String path = rp.path() == null ? "-" : rp.path().toAbsolutePath().toString(); + for (ValidationResult result : rp.results().validationResults()) { + printer.print(path); + printer.print(result.level()); + printer.print(result.validatorInfo().validatorId()); + printer.print(result.category()); + printer.print(result.message()); + printer.println(); + } + } + } +} diff --git a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java index e34f4497..d9020991 100644 --- a/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java +++ b/phenopacket-tools-util/src/main/java/org/phenopackets/phenopackettools/util/format/PhenopacketFormat.java @@ -12,6 +12,17 @@ public enum PhenopacketFormat { JSON, YAML; + /** + * Get file name suffix for the given {@link PhenopacketFormat} (e.g. {@code .json} for JSON). + */ + public String suffix() { + return switch (this) { + case PROTOBUF -> ".pb"; + case JSON -> ".json"; + case YAML -> ".yaml"; + }; + } + @Override public String toString() { return switch (this) { diff --git a/phenopacket-tools-validator-core/src/main/java/module-info.java b/phenopacket-tools-validator-core/src/main/java/module-info.java index 73856389..1abaea74 100644 --- a/phenopacket-tools-validator-core/src/main/java/module-info.java +++ b/phenopacket-tools-validator-core/src/main/java/module-info.java @@ -4,6 +4,7 @@ exports org.phenopackets.phenopackettools.validator.core.except; exports org.phenopackets.phenopackettools.validator.core.metadata; exports org.phenopackets.phenopackettools.validator.core.phenotype; + exports org.phenopackets.phenopackettools.validator.core.writer; requires org.monarchinitiative.phenol.core; requires org.phenopackets.schema; diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsAndPath.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsAndPath.java new file mode 100644 index 00000000..d8f07614 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsAndPath.java @@ -0,0 +1,14 @@ +package org.phenopackets.phenopackettools.validator.core.writer; + +import org.phenopackets.phenopackettools.validator.core.ValidationResults; + +import java.nio.file.Path; + +/** + * A record to use for writing {@link ValidationResults} by {@link ValidationResultsWriter}. + * + * @param results validation results to be written. + * @param path source of the input data or {@code null} if the input was received from standard input. + */ +public record ValidationResultsAndPath(ValidationResults results, Path path) { +} diff --git a/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsWriter.java b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsWriter.java new file mode 100644 index 00000000..e7c7a9a6 --- /dev/null +++ b/phenopacket-tools-validator-core/src/main/java/org/phenopackets/phenopackettools/validator/core/writer/ValidationResultsWriter.java @@ -0,0 +1,24 @@ +package org.phenopackets.phenopackettools.validator.core.writer; + +import org.phenopackets.phenopackettools.validator.core.ValidatorInfo; + +import java.io.IOException; +import java.util.List; + +/** + * Write out validation results obtained from validation of a top-level Phenopacket schema element. + */ +public interface ValidationResultsWriter { + + /** + * Write out the provided {@code validators} and {@code results}. + * + * @param validators a list with {@link ValidatorInfo} describing {@link org.phenopackets.phenopackettools.validator.core.PhenopacketValidator} + * used to validate the top-level element. + * @param results a list with {@link ValidationResultsAndPath} received from the validator. + * @throws IOException in case of IO errors, of course. + */ + void writeValidationResults(List validators, + List results) throws IOException; + +} From b278157fa25580ef82230528095c8b56a51e3ec7 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Oct 2022 21:52:41 -0400 Subject: [PATCH 12/20] Group CLI arguments into sections. Signed-off-by: Daniel Danis --- .../command/BaseIOCommand.java | 67 ++++++++++--------- .../command/ConvertCommand.java | 67 ++++++++++--------- .../command/ValidateCommand.java | 39 ++++++----- 3 files changed, 96 insertions(+), 77 deletions(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java index 42bf9c93..183f2013 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/BaseIOCommand.java @@ -27,29 +27,36 @@ public abstract class BaseIOCommand extends BaseCommand { private static final Logger LOGGER = LoggerFactory.getLogger(BaseIOCommand.class); - @CommandLine.Option(names = {"-i", "--input"}, - arity = "0..*", - description = "Input phenopacket(s).%nLeave empty for STDIN") - public List inputs = null; - - // The format will be sniffed if it is uninitialized. - @CommandLine.Option(names = {"-f", "--format"}, - description = "Phenopacket format.%nChoose from: {${COMPLETION-CANDIDATES}}") - public PhenopacketFormat format = null; - - // TODO - is it too hard to implement element sniffing? - @CommandLine.Option(names = {"-e", "--element"}, - description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") - public PhenopacketElement element = null; + @CommandLine.ArgGroup(validate = false, heading = "Inputs:%s") + public InputSection inputSection = new InputSection(); + + public static class InputSection { + @CommandLine.Option(names = {"-i", "--input"}, + arity = "0..*", + description = "Input phenopacket(s).%nLeave empty for STDIN") + public List inputs = null; + + // The format will be sniffed if it is uninitialized. + @CommandLine.Option(names = {"-f", "--format"}, + description = "Phenopacket format.%nChoose from: {${COMPLETION-CANDIDATES}}") + public PhenopacketFormat format = null; + + // TODO - is it too hard to implement element sniffing? + @CommandLine.Option(names = {"-e", "--element"}, + description = "Top-level element.%nChoose from {${COMPLETION-CANDIDATES}}%nDefault: phenopacket") + public PhenopacketElement element = null; + } /** * Attempt to read the input in the provided {@code schemaVersion} and exit upon any failure. As a side effect, - * {@link #format} and {@link #element} fields are set after the function returns. + * {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#format} + * and {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element} + * fields are set after the function returns. *

* Note that the function does not return if reading fails. */ protected List readMessagesOrExit(PhenopacketSchemaVersion schemaVersion) { - if (inputs == null) { + if (inputSection.inputs == null) { // Assuming a single input is coming from STDIN InputStream is = System.in; try { @@ -66,10 +73,10 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem // Assuming a one or more input are provided via `-i | --input`. // Picocli should ensure that `input` is never an empty list. `input` is `null` if no `-i` was supplied. - assert !inputs.isEmpty(); + assert !inputSection.inputs.isEmpty(); List messages = new ArrayList<>(); - for (Path input : inputs) { + for (Path input : inputSection.inputs) { try (InputStream is = new BufferedInputStream(Files.newInputStream(input))) { setFormatAndElement(is); Message message = parseMessage(schemaVersion, is); @@ -89,24 +96,24 @@ protected List readMessagesOrExit(PhenopacketSchemaVersion schem private void setFormatAndElement(InputStream is) throws IOException, FormatSniffException { PhenopacketFormat sniffed = parseFormat(is); - if (format == null) { - format = sniffed; + if (inputSection.format == null) { + inputSection.format = sniffed; } else { - if (!format.equals(sniffed)) + if (!inputSection.format.equals(sniffed)) // This can happen e.g. if processing multiple files at once but one turns out to be a different format. // We emit warning because this is likely not what the user intended and the code will likely explode // further downstream. - LOGGER.warn("Input format is set to {} but the current input looks like {}", format, sniffed); + LOGGER.warn("Input format is set to {} but the current input looks like {}", inputSection.format, sniffed); } - if (element == null) { + if (inputSection.element == null) { LOGGER.info("Input element type (-e | --element) was not provided, assuming phenopacket.."); - element = PhenopacketElement.PHENOPACKET; + inputSection.element = PhenopacketElement.PHENOPACKET; } } private Message parseMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { - return switch (format) { + return switch (inputSection.format) { case PROTOBUF -> readProtobufMessage(schemaVersion, is); case JSON -> readJsonMessage(schemaVersion, is); // TODO - implement YAML parsing @@ -117,12 +124,12 @@ private Message parseMessage(PhenopacketSchemaVersion schemaVersion, InputStream private Message readProtobufMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { LOGGER.debug("Reading protobuf message"); return switch (schemaVersion) { - case V1 -> switch (element) { + case V1 -> switch (inputSection.element) { case PHENOPACKET -> Phenopacket.parseFrom(is); case FAMILY -> Family.parseFrom(is); case COHORT -> Cohort.parseFrom(is); }; - case V2 -> switch (element) { + case V2 -> switch (inputSection.element) { case PHENOPACKET -> org.phenopackets.schema.v2.Phenopacket.parseFrom(is); case FAMILY -> org.phenopackets.schema.v2.Family.parseFrom(is); @@ -134,7 +141,7 @@ private Message readProtobufMessage(PhenopacketSchemaVersion schemaVersion, Inpu private Message readJsonMessage(PhenopacketSchemaVersion schemaVersion, InputStream is) throws IOException { LOGGER.debug("Reading JSON message"); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); - Message.Builder builder = prepareBuilder(schemaVersion, element); + Message.Builder builder = prepareBuilder(schemaVersion, inputSection.element); JsonFormat.parser().merge(reader, builder); return builder.build(); } @@ -155,13 +162,13 @@ private static Message.Builder prepareBuilder(PhenopacketSchemaVersion schemaVer } private PhenopacketFormat parseFormat(InputStream is) throws IOException, FormatSniffException { - if (format == null) { + if (inputSection.format == null) { LOGGER.info("Input format was not provided, making an educated guess.."); PhenopacketFormat fmt = FormatSniffer.sniff(is); LOGGER.info("The input looks like a {} file", fmt); return fmt; } - return format; + return inputSection.format; } protected record MessageAndPath(Message message, Path path) {} diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index 49cbb1e7..2f3c8be5 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -7,6 +7,7 @@ import org.phenopackets.schema.v1.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import picocli.CommandLine; import picocli.CommandLine.Command; import java.io.*; @@ -23,27 +24,31 @@ mixinStandardHelpOptions = true, sortOptions = false, description = "Convert a v1.0 phenopacket to a v2.0 phenopacket.", - footer = "Beware this process could be lossy!") + footer = "%nBeware, the conversion can be lossy!") public class ConvertCommand extends BaseIOCommand { + private static final Logger LOGGER = LoggerFactory.getLogger(ConvertCommand.class); /** - * A pattern to match file prefix + * A pattern to match the input file prefix. */ private static final Pattern PATTERN = Pattern.compile("^(?.*)\\.((pb)|(json)|(yaml))$"); - private static final Logger LOGGER = LoggerFactory.getLogger(ConvertCommand.class); - @Option(names = {"-o", "--output-format"}, - description = "Output format.%nDefault: input format") - public PhenopacketFormat outputFormat = null; + @CommandLine.ArgGroup(validate = false, heading = "Convert section:%n") + public ConvertSection convertSection = new ConvertSection(); - @Option(names = {"-O", "--output-directory"}, - description = "Path to output directory") - public Path outputDirectory = null; + public static class ConvertSection { + @Option(names = {"-o", "--output-format"}, + description = "Output format.%nDefault: input format") + public PhenopacketFormat outputFormat = null; - @Option(names = {"--convert-variants"}, - description = "Convert variant data.%nDefault: ${DEFAULT-VALUE}") - public boolean convertVariants = false; + @Option(names = {"-O", "--output-directory"}, + description = "Path to output directory") + public Path outputDirectory = null; + @Option(names = {"--convert-variants"}, + description = "Convert variant data.%nDefault: ${DEFAULT-VALUE}") + public boolean convertVariants = false; + } @Override public Integer call() { @@ -57,14 +62,14 @@ public Integer call() { List messages = readMessagesOrExit(PhenopacketSchemaVersion.V1); // (2) Convert into v2 format. - if (convertVariants) + if (convertSection.convertVariants) LOGGER.debug("Converting variants"); - V1ToV2Converter converter = V1ToV2Converter.of(convertVariants); + V1ToV2Converter converter = V1ToV2Converter.of(convertSection.convertVariants); List converted = new ArrayList<>(messages.size()); for (MessageAndPath mp : messages) { Message message = mp.message(); - Message v2 = switch (element) { + Message v2 = switch (inputSection.element) { case PHENOPACKET -> converter.convertPhenopacket((Phenopacket) message); case FAMILY -> converter.convertFamily((Family) message); case COHORT -> converter.convertCohort((Cohort) message); @@ -73,9 +78,9 @@ public Integer call() { } // (3) Set the output format if necessary. - if (outputFormat == null) { - LOGGER.info("Output format (-o | --output-format) not provided, writing data in the input format `{}`", format); - outputFormat = format; + if (convertSection.outputFormat == null) { + LOGGER.info("Output format (-o | --output-format) not provided, writing data in the input format `{}`", inputSection.format); + convertSection.outputFormat = inputSection.format; } // (4) Write out the output(s). @@ -86,19 +91,19 @@ public Integer call() { * Return {@code true} if CLI argument combination makes sense or {@code false} if the app should abort. */ private boolean checkInputArgumentsAreOk() { - if (inputs == null) { - if (outputDirectory != null) + if (inputSection.inputs == null) { + if (convertSection.outputDirectory != null) LOGGER.warn("Output directory was provided but the input is coming from STDIN. The output will be written to STDOUT"); } else { - if (inputs.isEmpty()) { + if (inputSection.inputs.isEmpty()) { throw new RuntimeException("Input list should never be empty!"); // A bug guard. } else { - if (inputs.size() > 1) { - if (outputDirectory == null) { + if (inputSection.inputs.size() > 1) { + if (convertSection.outputDirectory == null) { LOGGER.error("Output directory (-O | --output-directory) must be provided when processing >1 inputs"); return false; - } else if (!Files.isDirectory(outputDirectory)) { - LOGGER.error("The `-O | --output-directory` argument {} is not a directory", outputDirectory.toAbsolutePath()); + } else if (!Files.isDirectory(convertSection.outputDirectory)) { + LOGGER.error("The `-O | --output-directory` argument {} is not a directory", convertSection.outputDirectory.toAbsolutePath()); return false; } } @@ -114,12 +119,12 @@ private int writeOutConverted(List converted) { OutputStream os = null; try { // the input must have come from STDIN - if (mp.path() == null || outputDirectory == null) { + if (mp.path() == null || convertSection.outputDirectory == null) { os = System.out; } else { os = openOutputStream(mp.path()); } - writeMessage(mp.message(), outputFormat, os); + writeMessage(mp.message(), convertSection.outputFormat, os); } catch (IOException e) { LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); return 1; @@ -136,7 +141,7 @@ private int writeOutConverted(List converted) { // Writing out >1 items provided by `-i` options. for (MessageAndPath mp : converted) { try (OutputStream os = openOutputStream(mp.path())) { - writeMessage(mp.message(), outputFormat, os); + writeMessage(mp.message(), convertSection.outputFormat, os); } catch (IOException e) { LOGGER.error("Error while writing out a phenopacket: {}", e.getMessage(), e); return 1; @@ -151,15 +156,15 @@ private BufferedOutputStream openOutputStream(Path inputPath) throws IOException String fileName = inputPath.toFile().getName(); Matcher matcher = PATTERN.matcher(fileName); - String suffix = ".v2" + outputFormat.suffix(); + String suffix = convertSection.outputFormat.suffix(); Path output; if (matcher.matches()) { // Remove the prefix from the input file and create a new file String prefix = matcher.group("prefix"); - output = outputDirectory.resolve(prefix + suffix); + output = convertSection.outputDirectory.resolve(prefix + suffix); } else { // Just append the suffix. - output = outputDirectory.resolve(fileName + suffix); + output = convertSection.outputDirectory.resolve(fileName + suffix); } LOGGER.debug("Input path: {}, output path: {}", inputPath.toAbsolutePath(), output.toAbsolutePath()); diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java index c1676b38..843ad62e 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ValidateCommand.java @@ -34,14 +34,19 @@ public class ValidateCommand extends BaseIOCommand { private static final Logger LOGGER = LoggerFactory.getLogger(ValidateCommand.class); - @CommandLine.Option(names = {"--require"}, - arity = "*", - description = "Path to JSON schema with additional requirements to enforce.") - protected List requirements = List.of(); - - @CommandLine.Option(names = "--hpo", - description = "Path to hp.json file") - protected Path hpJson; + @CommandLine.ArgGroup(validate = false, heading = "Validate section:%n") + public ValidateSection validateSection = new ValidateSection(); + + public static class ValidateSection { + @CommandLine.Option(names = {"--require"}, + arity = "*", + description = "Path to JSON schema with additional requirements to enforce.") + protected List requirements = List.of(); + + @CommandLine.Option(names = "--hpo", + description = "Path to hp.json file") + protected Path hpJson; + } @Override public Integer call() { @@ -73,7 +78,7 @@ public Integer call() { private ValidationWorkflowRunner prepareWorkflowRunner() { List customJsonSchemas = prepareCustomSchemaUrls(); - Object runner = switch (element) { + Object runner = switch (inputSection.element) { case PHENOPACKET -> { List> semanticValidators = configureSemanticValidators(); yield JsonSchemaValidationWorkflowRunner.phenopacketBuilder() @@ -109,7 +114,7 @@ private ValidationWorkflowRunner prepareWorkflowRunner() { private List prepareCustomSchemaUrls() { LOGGER.debug("Preparing schemas for custom requirement validation"); List urls = new ArrayList<>(); - for (Path requirement : requirements) { + for (Path requirement : validateSection.requirements) { try { urls.add(requirement.toUri().toURL()); } catch (MalformedURLException e) { @@ -123,23 +128,25 @@ private List prepareCustomSchemaUrls() { /** * Prepare semantic validators for given {@link T}. *

- * Warning - it is important to request the {@link T} that is appropriate for the current {@link #element}. - * The app will crash and burn if e.g. {@link T} is {@link PhenopacketOrBuilder} while {@link #element} + * Warning - it is important to request the {@link T} that is appropriate + * for the current {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element}. + * The app will crash and burn if e.g. {@link T} is {@link PhenopacketOrBuilder} + * while {@link org.phenopackets.phenopackettools.command.BaseIOCommand.InputSection#element} * is {@link org.phenopackets.phenopackettools.util.format.PhenopacketElement#FAMILY}. */ private List> configureSemanticValidators() { // Right now we only have one semantic validator, but we'll extend this in the future. LOGGER.debug("Configuring semantic validators"); List> validators = new ArrayList<>(); - if (hpJson != null) { - LOGGER.debug("Reading HPO from '{}}'", hpJson.toAbsolutePath()); - Ontology hpo = OntologyLoader.loadOntology(hpJson.toFile()); + if (validateSection.hpJson != null) { + LOGGER.debug("Reading HPO from '{}}'", validateSection.hpJson.toAbsolutePath()); + Ontology hpo = OntologyLoader.loadOntology(validateSection.hpJson.toFile()); // The entire logic of this command stands and falls on correct state of `element` and the read message(s). // This method requires an appropriate combination of `T` and `element`, as described in Javadoc. // We suppress warning and perform an unchecked cast here, assuming `T` and `element` are appropriate. // The app will crash and burn if this is not the case. - PhenopacketValidator validator = switch (element) { + PhenopacketValidator validator = switch (inputSection.element) { case PHENOPACKET -> //noinspection unchecked (PhenopacketValidator) HpoPhenotypeValidators.phenopacketHpoPhenotypeValidator(hpo); case FAMILY -> //noinspection unchecked From 4aeff6e89cb487003df54743003ea18731f73b8c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 13 Oct 2022 09:30:07 -0400 Subject: [PATCH 13/20] Work on documentation. Signed-off-by: Daniel Danis --- docs/cli.rst | 130 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 15 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index eb16f55c..a2200205 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -5,20 +5,20 @@ Command line interface (CLI) ============================ *phenopacket-tools* CLI provides functionality for viewing, conversion and validation -of the top-level elements of Phenopacket schema . This document describes how to set up the CLI application +of the top-level elements of Phenopacket schema. This document describes how to set up the CLI application on Linux, Mac and Windows environments. .. note:: *phenopacket-tools* is written in Java 17 and requires Java 17 or newer to run. -*phenopacket-tools* is distributed as a standalone executable Java Archive (JAR) file. Provided that Java 17 or better -is available in the environment, the application requires no special installation procedure. +*phenopacket-tools* is distributed as a standalone executable Java Archive (JAR) file. The application requires +no special installation procedure if Java 17 or better is available in your environment. Setup ~~~~~ -Most users should download the precompiled JAR file from *phenopacket-tools* release page. -However, it is also possible to build the JAR from sources. +Most users should *download* the precompiled JAR file from *phenopacket-tools* release page. +However, it is also possible to *build* the JAR from sources. Download ^^^^^^^^ @@ -55,25 +55,125 @@ from *phenopacket-tools* releases. Commands ~~~~~~~~ -This section describes the commands of *phenopacket-tools* CLI. +*phenopacket-tools* CLI provides the following commands: -``validate`` - validate semantic and syntactic correctness +* ``examples`` - generate examples of the top-level elements +* ``convert`` - convert top-level elements from *v1* to *v2* format +* ``validate`` - validate semantic and syntactic correctness of top-level Phenopacket schema elements + +The ``examples`` command is fairly simple; it writes a bunch of example phenopackets, cohorts and families +into the provided directory. The ``convert`` and ``validate`` commands, despite being a bit more elaborate, work in +a similar manner. The parts shared by the both command are be described in greater detail +in the ``convert`` command section. + +In the next sections, we will run *phenopacket-tools* by using the following alias:: + + $ alias pxf="java -jar phenopacket-tools-cli-${project.version}.jar" + +*examples* - generate examples of the top-level elements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``examples`` command writes example phenopackets (including family and cohort examples) into +a provided base directory. Starting from a `base` directory, the examples are written into three sub-folders:: + + base + |- phenopackets + |- families + \- cohorts + +The ``examples`` command requires an optional ``-o | --output`` argument. By default, the examples will be placed +into the current directory. + +The following command writes the examples into the ``path/to/examples`` directory:: + + $ pxf examples -o path/to/examples + + +*convert* - convert top-level elements from *v1* to *v2* format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``convert`` command converts a phenopacket, family, or a cohort from *v1* to *v2* format of Phenopacket schema. + +Usage +##### + +Let's assume we have an example phenopacket ``phenopacket.v1.json``, family ``family.v1.json``, +and cohort ``cohort.v1.json``. + +We can convert a *v1* phenopacket into *v2* by running:: + + $ cat phenopacket.v1.json | pxf convert > phenopacket.v2.json + + + +*phenopacket-tools* makes an educated guess to determine if the input is in *JSON*, *Protobuf*, or *YAML* format. +The guessing is, however, naive and can fail in parsing e.g. gzipped *JSON* file. Turn of the format guessing +by providing the ``-f | --format`` option:: + + $ # Explicit JSON input + $ cat phenopacket.v1.json | pxf convert -f json > phenopacket.v2.json + $ + $ # Explicit protobuf input + $ cat phenopacket.v1.pb | pxf convert -f protobuf > phenopacket.v2.pb + +The ``-f | --format`` option accepts one of the following 3 values: ``{json, pb, yaml}``. + + + +Unless set up otherwise, the output is written in the format of the input data. +However, we can override this by using ``-o | --output-format`` option:: + + $ cat phenopacket.v1.json | pxf convert -o pb > phenopacket.v2.pb + +The ``-o | --output-format`` option takes the same values as ``--format``: ``{json, pb, yaml}``. + + +The ``convert`` command expects to receive a phenopacket by default. However, it can also convert the other +top-level elements of the Phenopacket schema: family and cohort. Use the ``-e | --element`` option to indicate if +the input is a ``family`` or a ``cohort``:: + + $ cat family.v1.json | pxf convert -e family > family.v2.json + $ cat cohort.v1.json | pxf convert -e cohort > cohort.v2.json + +We can convert one or more item at the time by using the ``-i | --input`` option. If the ``-i`` option is used only once, +the STDIN is ignored and the conversion proceeds in the same way as in the examples above. However, ``-i`` option can +be provided more than once, to convert a collection of items in a single run. The results of the bulk processing +are written into a directory supplied via the ``-O | --output-directory`` option (the option is mandatory if using +>1 ``-i``). + +For instance:: + + $ pxf convert -i phenopacket.a.v1.json -i phenopacket.b.v1.json -O converted + +converts the input phenopackets and stores the results in the ``converted`` folder. The converted files will be stored +under the same names. + + +*validate* - validate semantic and syntactic correctness ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``validate`` command checks *syntactic* and *semantic* correctness of a *phenopacket*, *family*, or *cohort*. -Briefly, phenopacket is syntactically correct if it is well formatted (valid Protobuf message, JSON document, etc.), -meets the requirements of the *Phenopacket schema* (e.g. the REQUIRED attributes such as ``phenopacket.id`` and -``phenopacket.meta_data``, are set), and ``MetaData`` includes a ``Resource`` for all ontology concepts. +Briefly, to be syntactically correct, a phenopacket must be well formatted (valid Protobuf message, JSON document, etc.) +and meet the requirements of the Phenopacket schema; all REQUIRED attributes are set (e.g. ``phenopacket.id`` and +``phenopacket.meta_data``), and ``MetaData`` includes a ``Resource`` for all ontology concepts. +The *semantic* correctness ensures that the element, when taken as a whole, is ... TODO - finish -.. - TODO - check the validation description. +Usage +##### -``convert`` - convert from v1 to v2 format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``validate`` command shares many CLI options with ``convert``. -TODO - write the section +The same options are used to indicate the input formats and element types. The input can be provided through STDIN +as well as in bulk. The bulk processing makes sense especially if we e.g. load the HPO graph for each validation. + +Results are written into STDOUT in CSV/TSV format. The CSV output has a header, each header line starts with ``#`` character. +The header contains phenopacket-tools version, date time of validation, and list of validators that were run. +A row with column names follows the header, and then the individual validation results. + +.. + TODO - check the validation description. Set up autocompletion ~~~~~~~~~~~~~~~~~~~~~ From 844a0aad4c35ed209cf303a99aa7866fb1df4dc4 Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 13 Oct 2022 10:38:43 -0400 Subject: [PATCH 14/20] fixing vcf record JSON Schema ($ref) --- .../phenopackettools/validator/jsonschema/vrsatile.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json index 60470595..217883d1 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json @@ -110,9 +110,7 @@ "minItems": 0 }, "vcfRecord": { - "type": { - "$ref": "#/definitions/vcfRecord" - } + "$ref": "#/definitions/vcfRecord" }, "xrefs": { "type": "array", From e9783263a30e4e644deb1cc1824992684bbf2b1e Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 13 Oct 2022 10:39:14 -0400 Subject: [PATCH 15/20] Adding functions for unknown/not provided ACMG and actionability --- .../builder/builders/VariantInterpretationBuilder.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariantInterpretationBuilder.java b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariantInterpretationBuilder.java index 335f88e1..1ce60248 100644 --- a/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariantInterpretationBuilder.java +++ b/phenopacket-tools-builder/src/main/java/org/phenopackets/phenopackettools/builder/builders/VariantInterpretationBuilder.java @@ -37,6 +37,11 @@ public static VariantInterpretationBuilder builder(VariationDescriptorBuilder bu return new VariantInterpretationBuilder(builder.build()); } + public VariantInterpretationBuilder acmgNotProvided() { + builder.setAcmgPathogenicityClassification(AcmgPathogenicityClassification.NOT_PROVIDED); + return this; + } + public VariantInterpretationBuilder benign() { builder.setAcmgPathogenicityClassification(AcmgPathogenicityClassification.BENIGN); return this; @@ -62,6 +67,11 @@ public VariantInterpretationBuilder pathogenic() { return this; } + public VariantInterpretationBuilder actionabilityUnknown() { + builder.setTherapeuticActionability(TherapeuticActionability.UNKNOWN_ACTIONABILITY); + return this; + } + public VariantInterpretationBuilder notActionable() { builder.setTherapeuticActionability(TherapeuticActionability.NOT_ACTIONABLE); return this; From 78fd2e34addc96dffd96ab4a827b48668902240e Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 13 Oct 2022 10:39:28 -0400 Subject: [PATCH 16/20] Moving example to supplement --- .../jsonschema => supplementary}/hpo-rare-disease-schema.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema => supplementary}/hpo-rare-disease-schema.json (100%) diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/hpo-rare-disease-schema.json b/supplementary/hpo-rare-disease-schema.json similarity index 100% rename from phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/hpo-rare-disease-schema.json rename to supplementary/hpo-rare-disease-schema.json From e694f72af33a7535f1b0def2d10f0e28a9b75f45 Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 13 Oct 2022 10:39:47 -0400 Subject: [PATCH 17/20] removing -o option --- .../phenopackets/phenopackettools/command/ConvertCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java index 2f3c8be5..4b736bd5 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/command/ConvertCommand.java @@ -37,7 +37,7 @@ public class ConvertCommand extends BaseIOCommand { public ConvertSection convertSection = new ConvertSection(); public static class ConvertSection { - @Option(names = {"-o", "--output-format"}, + @Option(names = {"--output-format"}, description = "Output format.%nDefault: input format") public PhenopacketFormat outputFormat = null; From d8ee811db7a7871493bab2f9c03b309f350dccee Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 13 Oct 2022 10:40:51 -0400 Subject: [PATCH 18/20] don't print empty disease onset. Fix inexistant ACMG/Actionability conversion bug --- .../converters/V1ToV2ConverterImpl.java | 2 +- .../converters/v2/DiseaseConverter.java | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java index 5bbdd252..7d9b9cd5 100644 --- a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/V1ToV2ConverterImpl.java @@ -137,7 +137,7 @@ private static Interpretation toV2Interpretation(org.phenopackets.schema.v1.Phen for (var descriptor : descriptors) { GenomicInterpretationBuilder genomicInterpretation = GenomicInterpretationBuilder.builder(v1.getSubject().getId()) .causative() - .variantInterpretation(VariantInterpretationBuilder.builder(descriptor)); + .variantInterpretation(VariantInterpretationBuilder.builder(descriptor).acmgNotProvided().actionabilityUnknown()); diagnosis.addGenomicInterpretation(genomicInterpretation.build()); } diff --git a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/v2/DiseaseConverter.java b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/v2/DiseaseConverter.java index b802b218..868babec 100644 --- a/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/v2/DiseaseConverter.java +++ b/phenopacket-tools-converter/src/main/java/org/phenopackets/phenopackettools/converter/converters/v2/DiseaseConverter.java @@ -20,14 +20,23 @@ public static List toDiseases(List Date: Thu, 13 Oct 2022 10:41:03 -0400 Subject: [PATCH 19/20] documentation --- docs/cli.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index a2200205..ef383414 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -120,12 +120,12 @@ The ``-f | --format`` option accepts one of the following 3 values: ``{json, pb, -Unless set up otherwise, the output is written in the format of the input data. -However, we can override this by using ``-o | --output-format`` option:: +By default, the output is written in the format of the input data. +However, we can override this by using ``--output-format`` option:: - $ cat phenopacket.v1.json | pxf convert -o pb > phenopacket.v2.pb + $ cat phenopacket.v1.json | pxf convert --output-format pb > phenopacket.v2.pb -The ``-o | --output-format`` option takes the same values as ``--format``: ``{json, pb, yaml}``. +The ``--output-format`` option takes the same values as ``--format``: ``{json, pb, yaml}``. The ``convert`` command expects to receive a phenopacket by default. However, it can also convert the other From d3b4f03517ac16ca687435becd6e93061ba2a1ec Mon Sep 17 00:00:00 2001 From: pnrobinson Date: Thu, 13 Oct 2022 11:09:46 -0400 Subject: [PATCH 20/20] documentation update --- docs/conf.py | 1 + docs/index.rst | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 67b2b9e8..659473f8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,6 +43,7 @@ html_theme = 'alabaster' html_static_path = ['_static'] html_css_files = ['ptools.css'] +#html_style = 'ptools.css' # The name of the Pygments (syntax highlighting) style to use. diff --git a/docs/index.rst b/docs/index.rst index 90e1a288..a371bc17 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -37,7 +37,7 @@ how to use the CLI application on your system. TODO - review the three points and sync them with the manuscript. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :caption: Contents: creating @@ -47,5 +47,8 @@ how to use the CLI application on your system. cli -.. image:: https://onlinelibrary.wiley.com/cms/asset/1cc0a141-da65-45a3-b7b0-6316b7b02069/ggn2202200016-fig-0002-m.jpg +.. figure:: https://onlinelibrary.wiley.com/cms/asset/1cc0a141-da65-45a3-b7b0-6316b7b02069/ggn2202200016-fig-0002-m.jpg :alt: GA4GH Phenopacket + :width: 800px + + Overview of the GA4GH Phenopacket Schema.