diff --git a/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json b/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json new file mode 100644 index 00000000..966d7b64 --- /dev/null +++ b/phenopacket-tools-cli/src/examples/phenopackets/retinoblastoma.json @@ -0,0 +1,498 @@ +{ + "id": "arbitrary.id", + "subject": { + "id": "proband A", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P6M" + } + }, + "sex": "FEMALE", + "karyotypicSex": "XX" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0030084", + "label": "Clinodactyly" + }, + "modifiers": [{ + "id": "HP:0012834", + "label": "Right" + }], + "onset": { + "age": { + "iso8601duration": "P3M" + } + } + }, { + "type": { + "id": "HP:0000555", + "label": "Leukocoria" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P4M" + } + } + }, { + "type": { + "id": "HP:0000486", + "label": "Strabismus" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P5M15D" + } + } + }, { + "type": { + "id": "HP:0000541", + "label": "Retinal detachment" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:79893-4", + "label": "Left eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 25.0, + "referenceRange": { + "unit": { + "id": "LOINC:56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }, { + "assay": { + "id": "LOINC:79892-6", + "label": "Right eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 15.0, + "referenceRange": { + "unit": { + "id": "LOINC:56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "biosamples": [{ + "id": "biosample.1", + "sampledTissue": { + "id": "UBERON:0000970", + "label": "eye" + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C35941", + "label": "Flexner-Wintersteiner Rosette Formation" + } + }, { + "type": { + "id": "NCIT:C132485", + "label": "Apoptosis and Necrosis" + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:33728-7", + "label": "Size.maximum dimension in Tumor" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm", + "label": "millimeter" + }, + "value": 15.0 + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P8M2W" + } + } + }], + "tumorProgression": { + "id": "NCIT:C8509", + "label": "Primary Neoplasm" + }, + "pathologicalTnmFinding": [{ + "id": "NCIT:C140720", + "label": "Retinoblastoma pT3 TNM Finding v8" + }, { + "id": "NCIT:C140711", + "label": "Retinoblastoma pN0 TNM Finding v8" + }], + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "files": [{ + "uri": "file://data/fileSomaticWgs.vcf.gz", + "individualToFileIdentifiers": { + "biosample.1": "specimen.1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }] + }], + "interpretations": [{ + "id": "interpretation.id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "proband A", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "cnv-1", + "moleculeContext": "genomic", + "variation": { + "copyNumber": { + "derivedSequenceExpression": { + "location": { + "sequenceId": "refseq:NC_000013.14", + "sequenceInterval": { + "startNumber": { + "value": "25981249" + }, + "endNumber": { + "value": "61706822" + } + } + } + }, + "number": { + "value": "1" + } + } + }, + "extensions": [{ + "name": "mosaicism", + "value": "40.0%" + }] + } + } + }, { + "subjectOrBiosampleId": "biosample.1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "rs121913300", + "variation": { + "allele": { + "sequenceLocation": { + "sequenceId": "refseq:NC_000013.11", + "sequenceInterval": { + "startNumber": { + "value": "48367511" + }, + "endNumber": { + "value": "48367512" + } + } + }, + "literalSequenceExpression": { + "sequence": "T" + } + } + }, + "label": "RB1 c.958C\u003eT (p.Arg320Ter)", + "geneContext": { + "valueId": "HGNC:9884", + "symbol": "RB1" + }, + "expressions": [{ + "syntax": "hgvs.c", + "value": "NM_000321.2:c.958C\u003eT" + }, { + "syntax": "transcript_reference", + "value": "NM_000321.2" + }], + "vcfRecord": { + "genomeAssembly": "GRCh38", + "chrom": "NC_000013.11", + "pos": "48367512", + "ref": "C", + "alt": "T" + }, + "extensions": [{ + "name": "allele-frequency", + "value": "25.0%" + }], + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "onset": { + "age": { + "iso8601duration": "P4M" + } + }, + "diseaseStage": [{ + "id": "LOINC:LA24739-7", + "label": "Group E" + }], + "clinicalTnmFinding": [{ + "id": "NCIT:C140678", + "label": "Retinoblastoma cM0 TNM Finding v8" + }], + "primarySite": { + "id": "UBERON:0004548", + "label": "left eye" + } + }], + "medicalActions": [{ + "treatment": { + "agent": { + "id": "DrugCentral:1678", + "label": "melphalan" + }, + "routeOfAdministration": { + "id": "NCIT:C38222", + "label": "Intraarterial Route of Administration" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "UCUM:mg.kg-1", + "label": "milligram per kilogram" + }, + "value": 0.4 + }, + "scheduleFrequency": { + "id": "NCIT:C64576", + "label": "Once" + }, + "interval": { + "start": "2020-09-02T00:00:00Z", + "end": "2020-09-02T00:00:00Z" + } + }] + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + }, + "adverseEvents": [{ + "id": "HP:0025637", + "label": "Vasospasm" + }], + "treatmentTerminationReason": { + "id": "NCIT:C41331", + "label": "Adverse Event" + } + }, { + "therapeuticRegimen": { + "ontologyClass": { + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" + }, + "startTime": { + "age": { + "iso8601duration": "P7M" + } + }, + "endTime": { + "age": { + "iso8601duration": "P8M" + } + }, + "regimenStatus": "COMPLETED" + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }, { + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }], + "files": [{ + "uri": "file://data/germlineWgs.vcf.gz", + "individualToFileIdentifiers": { + "proband A": "sample1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "2018-03-08", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }, { + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "3.34.0", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "2021-07-27", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2021-06-10", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }, { + "id": "loinc", + "name": "Logical Observation Identifiers Names and Codes", + "url": "https://loinc.org", + "version": "2.7.3", + "namespacePrefix": "LOINC", + "iriPrefix": "https://loinc.org/" + }, { + "id": "ucum", + "name": "Unified Code for Units of Measure", + "url": "https://ucum.org", + "version": "2.1", + "namespacePrefix": "UCUM", + "iriPrefix": "https://ucum.org/" + }, { + "id": "geno", + "name": "Genotype Ontology", + "url": "http://purl.obolibrary.org/obo/geno.owl", + "version": "2022-03-05", + "namespacePrefix": "GENO", + "iriPrefix": "http://purl.obolibrary.org/obo/GENO_" + }, { + "id": "drugcentral", + "name": "Drug Central", + "url": "https://drugcentral.org/", + "version": "08/22/2022", + "namespacePrefix": "DrugCentral", + "iriPrefix": "https://drugcentral.org/drugcard/" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file diff --git a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java index 69a46d8a..0a28729d 100644 --- a/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java +++ b/phenopacket-tools-cli/src/main/java/org/phenopackets/phenopackettools/cli/examples/Retinoblastoma.java @@ -1,5 +1,7 @@ package org.phenopackets.phenopackettools.cli.examples; +import org.ga4gh.vrs.v1.*; +import org.ga4gh.vrs.v1.Number; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.*; import org.phenopackets.phenopackettools.builder.constants.Laterality; @@ -71,10 +73,10 @@ Interpretation interpretation() { * @return Genomic interpretation related to a somatic missense mutation in the RB1 gene. */ GenomicInterpretation somaticRb1Missense() { - AlleleBuilder abuilder = AlleleBuilder.builder(); - abuilder.sequenceId("refseq:NC_000013.11"); - abuilder.interbaseStartEnd( 48367511, 48367512); - abuilder.altAllele("T"); + AlleleBuilder abuilder = AlleleBuilder.builder() + .sequenceId("refseq:NC_000013.11") + .interbaseStartEnd( 48367511, 48367512) + .altAllele("T"); VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder("rs121913300") .variation(abuilder.buildVariation()) .genomic() @@ -100,15 +102,31 @@ GenomicInterpretation somaticRb1Missense() { GenomicInterpretation germlineRb1Deletion() { - CopyNumberBuilder abuilder = CopyNumberBuilder.builder(); + CopyNumber cnv = CopyNumber.newBuilder() + .setDerivedSequenceExpression(DerivedSequenceExpression.newBuilder() + .setLocation(SequenceLocation.newBuilder() + .setSequenceId("refseq:NC_000013.14") + .setSequenceInterval(SequenceInterval.newBuilder() + .setStartNumber(Number.newBuilder(). + setValue(25981249) + .build()) + .setEndNumber(Number.newBuilder() + .setValue(61706822) + .build()) + .build()) + .build()) + .build()) + .setNumber(Number.newBuilder().setValue(1).build()) + .build(); //abuilder.copyNumberId("ga4gh:VCN.AFfJws1M4Lg8w1O3XknmHYc9TU2hHYpp"); // original coordinates in paper were given as 13q12.13q21.2(26,555,387–62,280,955 for hg19 //chr13 25981249 61706822 -- lifted over to hg38 + Variation variation = Variation.newBuilder() + .setCopyNumber(cnv) + .build(); - abuilder.alleleLocation("refseq:NC_000013.14",25981249, 61706822);//VRS uses inter-residue coordinates - abuilder.oneCopy(); VariationDescriptorBuilder vbuilder = VariationDescriptorBuilder.builder(); - vbuilder.variation(abuilder.buildVariation()); + vbuilder.variation(variation); vbuilder.mosaicism(40.0); VariantInterpretationBuilder vibuilder = VariantInterpretationBuilder.builder(vbuilder); vibuilder.pathogenic(); diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md new file mode 100644 index 00000000..c18f9455 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/README.md @@ -0,0 +1,25 @@ +# README + +This folder contains JSON schemas for validating top-level Phenopacket Schema elements and the `Variation` element +embedded in the Phenopacket Schema. + +## VRSATILE notes + +The datatype of the `VcfRecord.pos` field in `vrsatile.proto` is: +``` +uint64 pos = 3; +``` + +Since Protobuf's `JSONFormat` serializes `uint64` fields into a JSON `string` instead of a JSON `number`, +the JSON schema element for validation of the `VcfRecord.pos` field is: + +``` +"type": "string", +"pattern": "^[1-9][0-9]*$" +``` + +instead of a more straightforward: + +``` +"type": "integer" +``` diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json new file mode 100644 index 00000000..8e7dcd02 --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json @@ -0,0 +1,470 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://www.ga4gh.org/phenopackets/vrs-facade", + "title": "VRS Variation facade", + "description": "An adapter for the VRS Variation representation that is embedded into Phenopacket schema. Note that the adapter does not map 1:1 to VRS Variation.", + "type": "object", + "properties": { + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "copyNumber": { + "$ref": "#/definitions/CopyNumber" + }, + "text": { + "$ref": "#/definitions/Text" + }, + "variationSet": { + "$ref": "#/definitions/VariationSet" + } + }, + "oneOf": [ + { "required": [ "allele" ]}, + { "required": [ "haplotype" ]}, + { "required": [ "copyNumber" ]}, + { "required": [ "text" ]}, + { "required": [ "variationSet"]} + ], + "additionalProperties": false, + + + "definitions": { + "Allele": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "curie": { + "type": "string" + }, + "chromosomeLocation": { + "$ref": "#/definitions/ChromosomeLocation" + }, + "sequenceLocation": { + "$ref": "#/definitions/SequenceLocation" + }, + + "sequenceState": { + "$ref": "#/definitions/SequenceState" + }, + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "repeatedSequenceExpression": { + "$ref": "#/definitions/RepeatedSequenceExpression" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["curie"] }, + { "required": ["chromosomeLocation"] }, + { "required": ["sequenceLocation"] } + ] + }, { + "oneOf": [ + { "required": ["sequenceState"] }, + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] }, + { "required": ["repeatedSequenceExpression"] } + ] + } + ], + "additionalProperties": false + }, + + "Haplotype": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "members": { + "type": "array", + "items": { + "$ref": "#/definitions/HaplotypeMember" + } + } + }, + "additionalProperties": false + }, + + "HaplotypeMember": { + "type": "object", + "description": "A utility object for representing Haplotype.Member of vrs.proto", + "properties": { + "allele": { + "$ref": "#/definitions/Allele" + }, + "curie": { + "type": "string" + } + }, + "oneOf": [ + { "required": ["allele"] }, + { "required": ["curie"] } + ], + "additionalProperties": false + }, + + "CopyNumber": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "gene": { + "$ref": "#/definitions/Gene" + }, + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "repeatedSequenceExpression": { + "$ref": "#/definitions/RepeatedSequenceExpression" + }, + "curie": { + "type": "string" + }, + + "number": { + "$ref": "#/definitions/Number" + }, + "indefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "definiteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["allele"] }, + { "required": ["haplotype"] }, + { "required": ["gene"] }, + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] }, + { "required": ["repeatedSequenceExpression"] }, + { "required": ["curie"] } + ] + }, { + "oneOf": [ + { "required": ["number"] }, + { "required": ["indefiniteRange"] }, + { "required": ["definiteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "Text": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "definition": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "VariationSet": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "members": { + "type": "array", + "items": { + "$ref": "#/definitions/VariationSetMember" + } + } + }, + "additionalProperties": false + }, + + "VariationSetMember": { + "type": "object", + "description": "A utility object for representing VariationSet.Member of vrs.proto", + "properties": { + "curie": { + "type": "string" + }, + "allele": { + "$ref": "#/definitions/Allele" + }, + "haplotype": { + "$ref": "#/definitions/Haplotype" + }, + "copyNumber": { + "$ref": "#/definitions/CopyNumber" + }, + "text": { + "$ref": "#/definitions/Text" + }, + "variationSet": { + "$ref": "#/definitions/VariationSet" + } + }, + "oneOf": [ + { "required": ["curie"] }, + { "required": ["allele"] }, + { "required": ["haplotype"] }, + { "required": ["copyNumber"] }, + { "required": ["text"] }, + { "required": ["variationSet"] } + ], + "additionalProperties": false + }, + + "ChromosomeLocation": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "speciesId": { + "type": "string" + }, + "chr": { + "type": "string" + }, + "interval": { + "$ref": "#/definitions/CytobandInterval" + } + }, + "additionalProperties": false + }, + + "SequenceLocation": { + "type": "object", + "properties": { + "_id": { + "type": "string" + }, + "sequenceId": { + "type": "string" + }, + "sequenceInterval": { + "$ref": "#/definitions/SequenceInterval" + }, + "simpleInterval": { + "$ref": "#/definitions/SimpleInterval" + } + }, + "oneOf": [ + { "required": ["sequenceInterval"] }, + { "required": ["simpleInterval"] } + ], + "additionalProperties": false + }, + + "SequenceInterval": { + "type": "object", + "properties": { + "startNumber": { + "$ref": "#/definitions/Number" + }, + "startIndefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "startDefiniteRange": { + "$ref": "#/definitions/DefiniteRange" + }, + "endNumber": { + "$ref": "#/definitions/Number" + }, + "endIndefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "endDefiniteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["startNumber"] }, + { "required": ["startIndefiniteRange"] }, + { "required": ["startDefiniteRange"] } + ] + }, { + "oneOf": [ + { "required": ["endNumber"] }, + { "required": ["endIndefiniteRange"] }, + { "required": ["endDefiniteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "SimpleInterval": { + "type": "object", + "properties": { + "start": { + "$ref": "#/definitions/UnsignedInt64" + }, + "end": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "CytobandInterval": { + "type": "object", + "properties": { + "start": { + "type": "string" + }, + "end": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "Gene": { + "type": "object", + "properties": { + "geneId": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "Number": { + "type": "object", + "properties": { + "value": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "IndefiniteRange": { + "type": "object", + "properties": { + "value": { + "$ref": "#/definitions/UnsignedInt64" + }, + "comparator": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "DefiniteRange": { + "type": "object", + "properties": { + "min": { + "$ref": "#/definitions/UnsignedInt64" + }, + "max": { + "$ref": "#/definitions/UnsignedInt64" + } + }, + "additionalProperties": false + }, + + "SequenceState": { + "type": "object", + "properties": { + "sequence": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "LiteralSequenceExpression": { + "type": "object", + "properties": { + "sequence": { + "type": "string" + } + }, + "additionalProperties": false + }, + + "DerivedSequenceExpression": { + "type": "object", + "properties": { + "location": { + "$ref": "#/definitions/SequenceLocation" + }, + "reverseComplement": { + "type": "boolean" + } + }, + "additionalProperties": false + }, + + "RepeatedSequenceExpression": { + "type": "object", + "properties": { + "literalSequenceExpression": { + "$ref": "#/definitions/LiteralSequenceExpression" + }, + "derivedSequenceExpression": { + "$ref": "#/definitions/DerivedSequenceExpression" + }, + "number": { + "$ref": "#/definitions/Number" + }, + "indefiniteRange": { + "$ref": "#/definitions/IndefiniteRange" + }, + "definiteRange": { + "$ref": "#/definitions/DefiniteRange" + } + }, + "allOf": [ + { + "oneOf": [ + { "required": ["literalSequenceExpression"] }, + { "required": ["derivedSequenceExpression"] } + ] + }, { + "oneOf": [ + { "required": ["number"] }, + { "required": ["indefiniteRange"] }, + { "required": ["definiteRange"] } + ] + } + ], + "additionalProperties": false + }, + + "UnsignedInt64": { + "type": "string", + "pattern": "^[0-9]+$", + "description": "A utility to represent Protobuf `uint64` data type" + } + } +} \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json deleted file mode 100644 index ca2366be..00000000 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json +++ /dev/null @@ -1,980 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "GA4GH-VRS-Definitions", - "type": "object", - "definitions": { - "Variation": { - "description": "The root class of all Variation types", - "oneOf": [ - { - "$ref": "#/definitions/MolecularVariation" - }, - { - "$ref": "#/definitions/SystemicVariation" - }, - { - "$ref": "#/definitions/UtilityVariation" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "MolecularVariation": { - "description": "A variation on a contiguous molecule.", - "oneOf": [ - { - "$ref": "#/definitions/Allele" - }, - { - "$ref": "#/definitions/Haplotype" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "UtilityVariation": { - "description": "Utility variation classes that cannot be constrained to a specific biological class of variation.", - "oneOf": [ - { - "$ref": "#/definitions/Text" - }, - { - "$ref": "#/definitions/VariationSet" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "SystemicVariation": { - "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", - "oneOf": [ - { - "$ref": "#/definitions/Abundance" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Allele": { - "description": "The sequence state at a Location.", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Allele" - ], - "default": "Allele" - }, - "location": { - "oneOf": [ - { - "$ref": "#/definitions/CURIE" - }, - { - "$ref": "#/definitions/Location" - } - ] - }, - "state": { - "oneOf": [ - { - "$ref": "#/definitions/SequenceState" - }, - { - "$ref": "#/definitions/SequenceExpression" - } - ] - } - }, - "required": [ - "type", - "location", - "state" - ] - }, - "Haplotype": { - "description": "A set of zero or more Alleles", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Haplotype" - ], - "default": "Haplotype" - }, - "members": { - "type": "array", - "minItems": 1, - "uniqueItems": true, - "items": { - "oneOf": [ - { - "$ref": "#/definitions/Allele" - }, - { - "$ref": "#/definitions/CURIE" - } - ] - } - } - }, - "required": [ - "type", - "members" - ] - }, - "Text": { - "description": "A textual description of variation, typically not parseable but understood by humans.", - "additionalProperties": false, - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "Text" - ], - "default": "Text" - }, - "definition": { - "type": "string", - "description": "An textual representation of variation intended to capture variation descriptions that cannot be parsed, but still treated as variation." - } - }, - "required": [ - "type", - "definition" - ] - }, - "VariationSet": { - "description": "A set of Variation objects.\nMembers may be specified inline or by reference (with CURIEs)", - "type": "object", - "additionalProperties": false, - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "VariationSet" - ], - "default": "VariationSet" - }, - "members": { - "type": "array", - "uniqueItems": true, - "items": { - "oneOf": [ - { - "$ref": "#/definitions/CURIE" - }, - { - "$ref": "#/definitions/Variation" - } - ] - } - } - }, - "required": [ - "type", - "members" - ] - }, - "Abundance": { - "description": "The quantity of a feature, variation, molecule or part thereof in a system.", - "oneOf": [ - { - "$ref": "#/definitions/CopyNumber" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "CopyNumber": { - "additionalProperties": false, - "type": "object", - "description": "The count of copies of a Feature, Location, or Molecular Variation subject within a genome.", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "CopyNumber" - ], - "default": "CopyNumber" - }, - "subject": { - "oneOf": [ - { - "$ref": "#/definitions/MolecularVariation" - }, - { - "$ref": "#/definitions/Feature" - }, - { - "$ref": "#/definitions/SequenceExpression" - }, - { - "$ref": "#/definitions/CURIE" - } - ] - }, - "copies": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "copies": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "copies": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "subject", - "copies" - ] - }, - "Location": { - "description": "A Location represents a span on a specific sequence.", - "oneOf": [ - { - "$ref": "#/definitions/ChromosomeLocation" - }, - { - "$ref": "#/definitions/SequenceLocation" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "ChromosomeLocation": { - "additionalProperties": false, - "description": "A region of a chromosomed specified by species and name using cytogenetic naming conventions", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "ChromosomeLocation" - ], - "default": "ChromosomeLocation" - }, - "_id": { - "$ref": "#/definitions/CURIE" - }, - "species_id": { - "$ref": "#/definitions/CURIE", - "default": "taxonomy:9606" - }, - "chr": { - "type": "string" - }, - "interval": { - "$ref": "#/definitions/CytobandInterval" - } - }, - "required": [ - "type", - "species_id", - "chr", - "interval" - ] - }, - "SequenceLocation": { - "additionalProperties": false, - "description": "A specified subsequence within another sequence that is used as a reference sequence.", - "type": "object", - "properties": { - "_id": { - "$ref": "#/definitions/CURIE" - }, - "type": { - "type": "string", - "enum": [ - "SequenceLocation" - ], - "default": "SequenceLocation" - }, - "sequence_id": { - "$ref": "#/definitions/CURIE" - }, - "interval": { - "oneOf": [ - { - "$ref": "#/definitions/SequenceInterval" - }, - { - "$ref": "#/definitions/SimpleInterval" - } - ] - } - }, - "required": [ - "type", - "sequence_id", - "interval" - ] - }, - "SequenceInterval": { - "description": "A SequenceInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates.\nSequenceInterval is intended to be compatible with that in Sequence Ontology ([SO:0000001](http://www.sequenceontology.org/browser/current_svn/term/SO:0000001)), with the exception that the GA4GH VRS SequenceInterval may be zero-width. The SO definition is for an \"extent greater than zero\".", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "SequenceInterval" - ], - "default": "SequenceInterval" - }, - "start": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - }, - "end": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "start": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "start": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "end": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "end": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "start", - "end" - ] - }, - "CytobandInterval": { - "description": "A contiguous region specified by chromosomal bands features.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "CytobandInterval" - ], - "default": "CytobandInterval" - }, - "start": { - "$ref": "#/definitions/HumanCytoband" - }, - "end": { - "$ref": "#/definitions/HumanCytoband" - } - }, - "example": { - "type": "CytobandInterval", - "start": "q22.2", - "end": "q22.3" - }, - "required": [ - "type", - "start", - "end" - ] - }, - "SequenceExpression": { - "description": "One of a set of sequence representation syntaxes.", - "oneOf": [ - { - "$ref": "#/definitions/LiteralSequenceExpression" - }, - { - "$ref": "#/definitions/DerivedSequenceExpression" - }, - { - "$ref": "#/definitions/RepeatedSequenceExpression" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "LiteralSequenceExpression": { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "LiteralSequenceExpression" - ], - "default": "LiteralSequenceExpression" - }, - "sequence": { - "$ref": "#/definitions/Sequence" - } - }, - "required": [ - "type", - "sequence" - ] - }, - "DerivedSequenceExpression": { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "DerivedSequenceExpression" - ], - "default": "DerivedSequenceExpression" - }, - "location": { - "$ref": "#/definitions/SequenceLocation" - }, - "reverse_complement": { - "type": "boolean" - } - }, - "required": [ - "type", - "location", - "reverse_complement" - ] - }, - "RepeatedSequenceExpression": { - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "RepeatedSequenceExpression" - ], - "default": "RepeatedSequenceExpression" - }, - "seq_expr": { - "oneOf": [ - { - "$ref": "#/definitions/LiteralSequenceExpression" - }, - { - "$ref": "#/definitions/DerivedSequenceExpression" - } - ] - }, - "count": { - "oneOf": [ - { - "$ref": "#/definitions/Number" - }, - { - "$ref": "#/definitions/IndefiniteRange" - }, - { - "$ref": "#/definitions/DefiniteRange" - } - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/Number" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/IndefiniteRange" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "value": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - }, - { - "if": { - "properties": { - "count": { - "$ref": "#/definitions/DefiniteRange" - } - } - }, - "then": { - "properties": { - "count": { - "properties": { - "min": { - "minimum": 0, - "type": "integer" - }, - "max": { - "minimum": 0, - "type": "integer" - } - } - } - } - } - } - ], - "required": [ - "type", - "seq_expr", - "count" - ] - }, - "Feature": { - "description": "A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that may be Features.", - "oneOf": [ - { - "$ref": "#/definitions/Gene" - } - ], - "discriminator": { - "propertyName": "type" - } - }, - "Gene": { - "description": "A reference to an external gene system, used as a location for variation. Currently, the `ncbigene` namespace is required. See https://registry.identifiers.org/registry/ncbigene.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "Gene" - ], - "default": "Gene" - }, - "gene_id": { - "$ref": "#/definitions/CURIE" - } - }, - "required": [ - "type", - "gene_id" - ] - }, - "Number": { - "description": "A simple number value as a VRS class.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "Number" - ], - "default": "Number" - }, - "value": { - "type": "number" - } - }, - "required": [ - "type", - "value" - ] - }, - "IndefiniteRange": { - "description": "An indefinite range represented as a number and associated comparator. The bound operator is interpreted as follows: '>=' are all values greater than and including the value, '<=' are all numbers less than and including the value.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "IndefiniteRange" - ], - "default": "IndefiniteRange" - }, - "value": { - "type": "number" - }, - "comparator": { - "type": "string", - "enum": [ - "<=", - ">=" - ] - } - }, - "required": [ - "type", - "value", - "comparator" - ] - }, - "DefiniteRange": { - "description": "A bounded, inclusive range of numbers.", - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "type": "string", - "enum": [ - "DefiniteRange" - ], - "default": "DefiniteRange" - }, - "min": { - "type": "number" - }, - "max": { - "type": "number" - } - }, - "required": [ - "type", - "min", - "max" - ] - }, - "Sequence": { - "additionalProperties": false, - "description": "A character string of residues that represents a biological sequence using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes are permitted in Sequences.", - "type": "string", - "pattern": "^[A-Z*\\-]*$" - }, - "CURIE": { - "additionalProperties": false, - "description": "A string that refers to an object uniquely. The lifetime and scope of an id is defined by the sender.\nVRS does not impose any contraints on strings used as ids in messages. However, to maximize sharability of data, VRS RECOMMENDS that implementations use [W3C Compact URI (CURIE)](https://www.w3.org/TR/curie/) syntax.\nString CURIEs are represented as `prefix`:`reference` (W3C terminology), but often referred to as `namespace`:`accession` or `namespace`:`local id` colloquially.\nVRS also RECOMMENDS that `prefix` be defined in identifiers.org.\nThe `reference` component is an unconstrained string.\nA CURIE is a URI. URIs may *locate* objects (i.e., specify where to retrieve them) or *name* objects conceptually. VRS uses CURIEs primarily as a naming mechanism.\nImplementations MAY provide CURIE resolution mechanisms for prefixes to make these objects locatable.\nUsing internal ids in public messages is strongly discouraged.", - "type": "string", - "pattern": "^\\w[^:]*:.+$", - "example": "ensembl:ENSG00000139618" - }, - "HumanCytoband": { - "additionalProperties": false, - "description": "A interval on a stained metaphase chromosome specified by cytobands. CytobandIntervals include the regions described by the start and end cytobands.", - "type": "string", - "pattern": "^cen|[pq](ter|([1-9][0-9]*(\\.[1-9][0-9]*)?))$", - "example": "q22.3" - }, - "SequenceState": { - "deprecated": true, - "description": "DEPRECATED: An assertion of the state of a sequence, typically at a Sequence Location within an Allele.\nThis class is deprecated. Use LiteralSequenceExpression instead.", - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "SequenceState" - ], - "default": "SequenceState" - }, - "sequence": { - "$ref": "#/definitions/Sequence" - } - }, - "example": { - "type": "SequenceState", - "sequence": "C" - }, - "required": [ - "type", - "sequence" - ] - }, - "SimpleInterval": { - "deprecated": true, - "description": "DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates.\nThis class is deprecated. Use SequenceInterval instead.", - "additionalProperties": false, - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "SimpleInterval" - ], - "default": "SimpleInterval" - }, - "start": { - "type": "integer" - }, - "end": { - "type": "integer" - } - }, - "example": { - "type": "SimpleInterval", - "start": 11, - "end": 22 - }, - "required": [ - "type", - "start", - "end" - ] - } - } -} \ No newline at end of file diff --git a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json index 1af561e1..840000a4 100644 --- a/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json +++ b/phenopacket-tools-validator-jsonschema/src/main/resources/org/phenopackets/phenopackettools/validator/jsonschema/vrsatile.json @@ -1,6 +1,6 @@ { - "$schema": "https://json-schema.org/draft/2019-09/schema#", - "$id": "https://www.ga4gh.org/phenopackets", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://www.ga4gh.org/phenopackets/vrsatile", "title": "VRS Added Tools for Interoperable Loquacious Exchange", "description": "VRSATILE: A set of proposed extensions for GA4GH's Variation Representation Specification (VRS) to enable interoperable exchange of common descriptive data alongside variation concepts", "type": "object", @@ -58,7 +58,8 @@ }, "pos" : { "description": "position on the chromosome (VCF convention)", - "type": "integer" + "type": "string", + "pattern": "^[1-9][0-9]*$" }, "id" : { "description": "identifier as used in VCF line", @@ -97,8 +98,8 @@ "type": "string" }, "variation": { - "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs.json#/definitions/Variation", - "description": "The VRS Variation object" + "$ref": "classpath:/org/phenopackets/phenopackettools/validator/jsonschema/vrs-variation-adapter.json", + "description": "An adapter for the VRS Variation representation that is embedded into Phenopacket schema. Note that the adapter does not map 1:1 to VRS Variation." }, "label": { "type": "string", diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java index cf885756..526ef33d 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/JsonSchemaValidationWorkflowRunnerTest.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import java.nio.file.Path; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -223,14 +224,34 @@ public void checkVariantInterpretationConstraints(String path, String action, St testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); } -// TODO - implement tests -// @ParameterizedTest -// @CsvSource({ -// "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor: is missing but it is required'", -// }) -// public void checkVariationDescriptorConstraints(String path, String action, String expected) { -// testErrors(runner, readBethlemPhenopacketNode(), path, action, expected); -// } + @ParameterizedTest + @CsvSource({ + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/id, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.id: is missing but it is required'", + "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor/moleculeContext, DELETE, '$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.moleculeContext: is missing but it is required'", + }) + public void checkVariationDescriptorConstraints(String path, String action, String expected) { + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expected); + } + + /** + * As of Nov 9, 2022, the {@link org.ga4gh.vrs.v1.Variation} validator does not check presence + * of required fields. The validator can only check presence of {@code oneof} fields. + *

+ * Note that the {@code path} is split into a prefix and sub-path to increase legibility of the + * test parameters. + */ + @ParameterizedTest + @CsvSource({ + "/variation/copyNumber, DELETE, 'HERE.allele: is missing but it is required|HERE.haplotype: is missing but it is required|HERE.copyNumber: is missing but it is required|HERE.text: is missing but it is required|HERE.variationSet: is missing but it is required'", + }) + public void removingAOneOfFieldFromVariationProducesValidationError(String subPath, String action, String subExpected) { + String pathPrefix = "/interpretations[0]/diagnosis/genomicInterpretations[0]/variantInterpretation/variationDescriptor"; + String path = pathPrefix.concat(subPath); + + String validationMessagePrefix = "\\$.interpretations[0].diagnosis.genomicInterpretations[0].variantInterpretation.variationDescriptor.variation"; + String expectedValidationMessage = subExpected.replaceAll("HERE", validationMessagePrefix); + testErrors(runner, readRetinoblastomaPhenopacketNode(), path, action, expectedValidationMessage); + } /** * Absence of `term` leads to an {@link org.phenopackets.phenopackettools.validator.core.ValidationLevel#ERROR}. @@ -372,7 +393,15 @@ public class RecommendedFieldsTest { } private static JsonNode readBethlemPhenopacketNode() { - try (InputStream is = Files.newInputStream(TestData.BETHLEM_MYOPATHY_PHENOPACKET_JSON)){ + return readJsonTree(TestData.BETHLEM_MYOPATHY_PHENOPACKET_JSON); + } + + private static JsonNode readRetinoblastomaPhenopacketNode() { + return readJsonTree(TestData.RETINOBLASTOMA_PHENOPACKET_JSON); + } + + private static JsonNode readJsonTree(Path jsonPath) { + try (InputStream is = Files.newInputStream(jsonPath)){ return MAPPER.readTree(is); } catch (IOException e) { throw new RuntimeException(e); diff --git a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java index 94356a47..615e29f2 100644 --- a/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java +++ b/phenopacket-tools-validator-jsonschema/src/test/java/org/phenopackets/phenopackettools/validator/jsonschema/TestData.java @@ -18,6 +18,12 @@ public class TestData { */ public static final Path BETHLEM_MYOPATHY_PHENOPACKET_JSON = TEST_BASE_DIR.resolve("bethlem-myopathy.json"); + /** + * A path to an example phenopacket representing a case of retinoblastoma. The phenopacket is useful since + * it contains a VRS-like Variation object. + */ + public static final Path RETINOBLASTOMA_PHENOPACKET_JSON = TEST_BASE_DIR.resolve("retinoblastoma.json"); + /** * A path to an example family that, despite being medically invalid/nonsense, is complete from the testing diff --git a/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json new file mode 100644 index 00000000..ca1fabdb --- /dev/null +++ b/phenopacket-tools-validator-jsonschema/src/test/resources/org/phenopackets/phenopackettools/validator/jsonschema/retinoblastoma.json @@ -0,0 +1,463 @@ +{ + "id": "arbitrary.id", + "subject": { + "id": "proband A", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P6M" + } + }, + "sex": "FEMALE", + "karyotypicSex": "XX" + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0030084", + "label": "Clinodactyly" + }, + "modifiers": [{ + "id": "HP:0012834", + "label": "Right" + }], + "onset": { + "age": { + "iso8601duration": "P3M" + } + } + }, { + "type": { + "id": "HP:0000555", + "label": "Leukocoria" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P4M" + } + } + }, { + "type": { + "id": "HP:0000486", + "label": "Strabismus" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P5M15D" + } + } + }, { + "type": { + "id": "HP:0000541", + "label": "Retinal detachment" + }, + "modifiers": [{ + "id": "HP:0012835", + "label": "Left" + }], + "onset": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:79893-4", + "label": "Left eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 25.0, + "referenceRange": { + "unit": { + "id": "56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }, { + "assay": { + "id": "LOINC:79892-6", + "label": "Right eye Intraocular pressure" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm[Hg]", + "label": "millimetres of mercury" + }, + "value": 15.0, + "referenceRange": { + "unit": { + "id": "56844-4", + "label": "Intraocular pressure of Eye" + }, + "low": 10.0, + "high": 21.0 + } + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P6M" + } + } + }], + "biosamples": [{ + "id": "biosample.1", + "sampledTissue": { + "id": "UBERON:0000970", + "label": "eye" + }, + "phenotypicFeatures": [{ + "type": { + "id": "NCIT:C35941", + "label": "Flexner-Wintersteiner Rosette Formation" + } + }, { + "type": { + "id": "NCIT:C132485", + "label": "Apoptosis and Necrosis" + } + }], + "measurements": [{ + "assay": { + "id": "LOINC:33728-7", + "label": "Size.maximum dimension in Tumor" + }, + "value": { + "quantity": { + "unit": { + "id": "UCUM:mm", + "label": "millimeter" + }, + "value": 15.0 + } + }, + "timeObserved": { + "age": { + "iso8601duration": "P8M2W" + } + } + }], + "tumorProgression": { + "id": "NCIT:C8509", + "label": "Primary Neoplasm" + }, + "pathologicalTnmFinding": [{ + "id": "NCIT:C140720", + "label": "Retinoblastoma pT3 TNM Finding v8" + }, { + "id": "NCIT:C140711", + "label": "Retinoblastoma pN0 TNM Finding v8" + }], + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "files": [{ + "uri": "file://data/fileSomaticWgs.vcf.gz", + "individualToFileIdentifiers": { + "biosample.1": "specimen.1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }] + }], + "interpretations": [{ + "id": "interpretation.id", + "progressStatus": "SOLVED", + "diagnosis": { + "disease": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "genomicInterpretations": [{ + "subjectOrBiosampleId": "proband A", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "example-cnv", + "moleculeContext": "genomic", + "variation": { + "copyNumber": { + "derivedSequenceExpression": { + "location": { + "sequenceId": "refseq:NC_000013.14", + "sequenceInterval": { + "startNumber": { + "value": "25981249" + }, + "endNumber": { + "value": "61706822" + } + } + } + }, + "number": { + "value": "1" + } + } + }, + "extensions": [{ + "name": "mosaicism", + "value": "40.0%" + }] + } + } + }, { + "subjectOrBiosampleId": "biosample.1", + "interpretationStatus": "CAUSATIVE", + "variantInterpretation": { + "acmgPathogenicityClassification": "PATHOGENIC", + "therapeuticActionability": "ACTIONABLE", + "variationDescriptor": { + "id": "rs121913300", + "variation": { + "allele": { + "sequenceLocation": { + "sequenceId": "refseq:NC_000013.11", + "sequenceInterval": { + "startNumber": { + "value": "48367511" + }, + "endNumber": { + "value": "48367512" + } + } + }, + "literalSequenceExpression": { + "sequence": "T" + } + } + }, + "label": "RB1 c.958C\u003eT (p.Arg320Ter)", + "geneContext": { + "valueId": "HGNC:9884", + "symbol": "RB1" + }, + "expressions": [{ + "syntax": "hgvs.c", + "value": "NM_000321.2:c.958C\u003eT" + }, { + "syntax": "transcript_reference", + "value": "NM_000321.2" + }], + "vcfRecord": { + "genomeAssembly": "GRCh38", + "chrom": "NC_000013.11", + "pos": "48367512", + "ref": "C", + "alt": "T" + }, + "extensions": [{ + "name": "allele-frequency", + "value": "25.0%" + }], + "moleculeContext": "genomic", + "allelicState": { + "id": "GENO:0000135", + "label": "heterozygous" + } + } + } + }] + } + }], + "diseases": [{ + "term": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "onset": { + "age": { + "iso8601duration": "P4M" + } + }, + "diseaseStage": [{ + "id": "LOINC:LA24739-7", + "label": "Group E" + }], + "clinicalTnmFinding": [{ + "id": "NCIT:C140678", + "label": "Retinoblastoma cM0 TNM Finding v8" + }], + "primarySite": { + "id": "UBERON:0004548", + "label": "left eye" + } + }], + "medicalActions": [{ + "treatment": { + "agent": { + "id": "DrugCentral:1678", + "label": "melphalan" + }, + "routeOfAdministration": { + "id": "NCIT:C38222", + "label": "Intraarterial Route of Administration" + }, + "doseIntervals": [{ + "quantity": { + "unit": { + "id": "UCUM:mg.kg-1", + "label": "milligram per kilogram" + }, + "value": 0.4 + }, + "scheduleFrequency": { + "id": "NCIT:C64576", + "label": "Once" + }, + "interval": { + "start": "2020-09-02T00:00:00Z", + "end": "2020-09-02T00:00:00Z" + } + }] + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + }, + "adverseEvents": [{ + "id": "HP:0025637", + "label": "Vasospasm" + }], + "treatmentTerminationReason": { + "id": "NCIT:C41331", + "label": "Adverse Event" + } + }, { + "therapeuticRegimen": { + "ontologyClass": { + "id": "NCIT:C10894", + "label": "Carboplatin/Etoposide/Vincristine" + }, + "startTime": { + "age": { + "iso8601duration": "P7M" + } + }, + "endTime": { + "age": { + "iso8601duration": "P8M" + } + }, + "regimenStatus": "COMPLETED" + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }, { + "procedure": { + "code": { + "id": "NCIT:C48601", + "label": "Enucleation" + }, + "bodySite": { + "id": "UBERON:0004548", + "label": "left eye" + }, + "performed": { + "age": { + "iso8601duration": "P8M2W" + } + } + }, + "treatmentTarget": { + "id": "NCIT:C7541", + "label": "Retinoblastoma" + }, + "treatmentIntent": { + "id": "NCIT:C62220", + "label": "Cure" + } + }], + "files": [{ + "uri": "file://data/germlineWgs.vcf.gz", + "individualToFileIdentifiers": { + "proband A": "sample1" + }, + "fileAttributes": { + "genomeAssembly": "GRCh38", + "fileFormat": "VCF" + } + }], + "metaData": { + "created": "2021-05-14T10:35:00Z", + "createdBy": "anonymous biocurator", + "resources": [{ + "id": "ncit", + "name": "NCI Thesaurus", + "url": "http://purl.obolibrary.org/obo/ncit.owl", + "version": "21.05d", + "namespacePrefix": "NCIT", + "iriPrefix": "http://purl.obolibrary.org/obo/NCIT_" + }, { + "id": "efo", + "name": "Experimental Factor Ontology", + "url": "http://www.ebi.ac.uk/efo/efo.owl", + "version": "3.34.0", + "namespacePrefix": "EFO", + "iriPrefix": "http://purl.obolibrary.org/obo/EFO_" + }, { + "id": "uberon", + "name": "Uber-anatomy ontology", + "url": "http://purl.obolibrary.org/obo/uberon.owl", + "version": "2021-07-27", + "namespacePrefix": "UBERON", + "iriPrefix": "http://purl.obolibrary.org/obo/UBERON_" + }, { + "id": "ncbitaxon", + "name": "NCBI organismal classification", + "url": "http://purl.obolibrary.org/obo/ncbitaxon.owl", + "version": "2021-06-10", + "namespacePrefix": "NCBITaxon", + "iriPrefix": "http://purl.obolibrary.org/obo/NCBITaxon_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} \ No newline at end of file