From 1f0275d7b0b22b9f8fbb3a84b0d32147b9470177 Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Sat, 2 Nov 2024 23:58:31 +0200 Subject: [PATCH 1/2] Add many examples to schema and update some descriptions --- src/sssom_schema/schema/sssom_schema.yaml | 139 +++++++++++++++++----- 1 file changed, 112 insertions(+), 27 deletions(-) diff --git a/src/sssom_schema/schema/sssom_schema.yaml b/src/sssom_schema/schema/sssom_schema.yaml index 099e8928..3001242f 100644 --- a/src/sssom_schema/schema/sssom_schema.yaml +++ b/src/sssom_schema/schema/sssom_schema.yaml @@ -130,7 +130,7 @@ slots: description: A URL to a homepage of this mapping commons. range: uri mappings: - description: Contains a list of mapping objects + description: Contains a list of mapping objects. range: mapping multivalued: true inlined_as_list: true @@ -145,7 +145,7 @@ slots: - value: HP:0009894 description: The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears' subject_label: - description: The label of subject of the mapping + description: The label of subject of the mapping. range: string examples: - value: Thickened ears @@ -164,11 +164,11 @@ slots: - https://github.com/mapping-commons/sssom/issues/256 examples: - value: UBERON:0001062 - description: (The CURIE of the Uberon term for "anatomical entity".) + description: The CURIE of the Uberon term for "anatomical entity". - value: anatomical entity - description: (A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID.) + description: A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID. - value: biolink:Gene - description: (The CURIE of the biolink class for genes.) + description: The CURIE of the biolink class for genes. subject_type: description: The type of entity that is being mapped. range: entity_type_enum @@ -185,7 +185,6 @@ slots: range: EntityReference required: true slot_uri: owl:annotatedProperty - examples: - value: owl:sameAs description: The subject and the object are instances (owl individuals), and the two instances are the same. @@ -208,7 +207,7 @@ slots: - value: skos:broadMatch description: "From the SKOS primer: A triple skos:broader (and skos:broadMatch) asserts that , the object of the triple, is a broader concept than , the subject of the triple." - value: oboInOwl:hasDbXref - description: Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go) + description: Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go - value: rdfs:seeAlso description: The subject and the object are associated in some unspecified way. The object IRI often resolves to a resource on the web that provides additional information. predicate_modifier: @@ -220,7 +219,7 @@ slots: - value: Not description: Negates the predicate, see documentation of predicate_modifier_enum predicate_label: - description: The label of the predicate/relation of the mapping + description: The label of the predicate/relation of the mapping. range: string examples: - value: has cross-reference @@ -239,9 +238,9 @@ slots: slot_uri: owl:annotatedTarget examples: - value: HP:0009894 - description: The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears' + description: The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears'. object_label: - description: The label of object of the mapping + description: The label of object of the mapping. range: string examples: - value: Thickened ears @@ -260,11 +259,11 @@ slots: - https://github.com/mapping-commons/sssom/issues/256 examples: - value: UBERON:0001062 - description: (The CURIE of the Uberon term for "anatomical entity".) + description: The CURIE of the Uberon term for "anatomical entity". - value: anatomical entity - description: (A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID.) + description: A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID. - value: biolink:Gene - description: (The CURIE of the biolink class for genes.) + description: The CURIE of the biolink class for genes. mapping_justification: description: A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable. range: EntityReference @@ -298,18 +297,18 @@ slots: range: uri examples: - value: http://purl.obolibrary.org/obo/mondo/mappings/mondo_exactmatch_ncit.sssom.tsv - description: (A persistent URI pointing to the latest version of the Mondo - NCIT mapping in the Mondo namespace.) + description: A persistent URI pointing to the latest version of the Mondo - NCIT mapping in the Mondo namespace. mapping_set_version: description: A version string for the mapping. range: string slot_uri: owl:versionInfo examples: - value: "2020-01-01" - description: (A date-based version that indicates that the mapping was published on the 1st January in 2021.) + description: A date-based version that indicates that the mapping was published on the 1st January in 2021. - value: "1.2.1" description: "(A semantic version tag that indicates that this is the 1st major, 2nd minor version, patch 1 (https://semver.org/).)" mapping_set_group: - description: Set by the owners of the mapping registry. A way to group . + description: Set by the owners of the mapping registry. A way to group related mapping sets for example for UI purposes. range: string mapping_set_title: description: The display name of a mapping set. @@ -333,11 +332,21 @@ slots: slot_uri: dcterms:creator range: EntityReference multivalued: true + examples: + - value: orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165 + description: The ORCID of the (multiple) creators of the mapping. + - value: orcid:0000-0002-7356-1779 + description: The ORCID of the creator of the mapping. creator_label: description: A string identifying the creator of this mapping. In the spirit of provenance, consider using creator_id instead. range: string multivalued: true + examples: + - value: Nicolas Matentzoglu|Chris Mungall + description: The human-readable names of the (multiple) creators of the mapping. + - value: Nicolas Matentzoglu + description: The human-readable name of the creator of the mapping. author_id: description: Identifies the persons or groups responsible for asserting the mappings. Recommended to be a list of ORCIDs or otherwise @@ -345,27 +354,50 @@ slots: slot_uri: pav:authoredBy range: EntityReference multivalued: true + examples: + - value: orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165 + description: The ORCID of the (multiple) authors of the mapping. + - value: orcid:0000-0002-7356-1779 + description: The ORCID of the author of the mapping. author_label: description: A string identifying the author of this mapping. In the spirit of provenance, consider using author_id instead. range: string multivalued: true + examples: + - value: Nicolas Matentzoglu|Chris Mungall + description: The human-readable names of the (multiple) authors of the mapping. + - value: Nicolas Matentzoglu + description: The human-readable name of the author of the mapping. reviewer_id: description: Identifies the persons or groups that reviewed and confirmed the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs. range: EntityReference multivalued: true + examples: + - value: orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165 + description: The ORCID of the (multiple) reviewers of the mapping. + - value: orcid:0000-0002-7356-1779 + description: The ORCID of the reviewer of the mapping. reviewer_label: description: A string identifying the reviewer of this mapping. In the spirit of provenance, consider using reviewer_id instead. range: string multivalued: true + examples: + - value: Nicolas Matentzoglu|Chris Mungall + description: The human-readable names of the (multiple) reviewers of the mapping. + - value: Nicolas Matentzoglu + description: The human-readable name of the reviewer of the mapping. license: description: A url to the license of the mapping. In absence of a license we assume no license. range: uri slot_uri: dcterms:license + examples: + - value: https://creativecommons.org/licenses/by/4.0/ + description: The URI of the Creative Commons Attribution 4.0 International license. subject_source: description: URI of vocabulary or identifier source for the subject. range: EntityReference @@ -385,7 +417,7 @@ slots: propagated: true examples: - value: http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl - description: (A persistent Version IRI pointing to the Mondo version '2021-01-30') + description: A persistent Version IRI pointing to the Mondo version '2021-01-30' object_source: description: URI of vocabulary or identifier source for the object. range: EntityReference @@ -405,7 +437,7 @@ slots: propagated: true examples: - value: http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl - description: (A persistent Version IRI pointing to the Mondo version '2021-01-30') + description: A persistent Version IRI pointing to the Mondo version '2021-01-30' mapping_provider: description: URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived. @@ -413,6 +445,11 @@ slots: instantiates: sssom:Propagatable annotations: propagated: true + examples: + - value: https://www.ohdsi.org/ + description: A URL pointing to the Observational Health Data Sciences and Informatics initiative. + - value: https://monarchinitiative.org/ + description: A URL pointing to the Monarch Initiative Resource. mapping_set_source: description: A mapping set or set of mapping set that was used to derive the mapping set. slot_uri: prov:wasDerivedFrom @@ -427,12 +464,18 @@ slots: range: EntityReference examples: - value: MONDO_MAPPINGS:mondo_exactmatch_ncit.sssom.tsv + description: A reference to the mapping set that originally contained this mapping. mapping_cardinality: description: A string indicating whether this mapping is from a 1:1 (the subject_id maps to a single object_id), 1:n (the subject maps to more than one object_id), n:1, 1:0, 0:1 or n:n group. Note that this is a convenience field that should be derivable from the mapping set. range: mapping_cardinality_enum + examples: + - value: "1:1" + description: A one-to-one mapping. + - value: "1:n" + description: A one-to-many mapping. mapping_tool: description: A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. @@ -442,6 +485,9 @@ slots: propagated: true examples: - value: https://github.com/AgreementMakerLight/AML-Project + description: A URL pointing to the AgreementMakerLight project. + - value: AgreementMakerLight + description: A string (name) denoting the AgreementMakerLight project. mapping_tool_version: description: Version string that denotes the version of the mapping tool used. range: string @@ -455,18 +501,25 @@ slots: slot_uri: pav:authoredOn range: date instantiates: sssom:Propagatable + examples: + - value: 2021-01-01 annotations: propagated: true publication_date: description: The date the mapping was published. This is different from the date the mapping was asserted. slot_uri: dcterms:created range: date + examples: + - value: 2021-01-01 confidence: description: A score between 0 and 1 to denote the confidence or probability that the match is correct, where 1 denotes total confidence. range: double minimum_value: 0.0 maximum_value: 1.0 + examples: + - value: 0.95 + description: A confidence score of 0.95, indicating 95% confidence. subject_match_field: description: A list of properties (term annotations on the subject) that was used for the match. @@ -475,6 +528,11 @@ slots: instantiates: sssom:Propagatable annotations: propagated: true + examples: + - value: rdfs:label + description: "The RDFS label property (rdfs:label) was used to match the subject." + - value: skos:prefLabel + description: "The SKOS preferred label property (skos:prefLabel) was used to match the subject." object_match_field: description: A list of properties (term annotations on the object) that was used for the match. @@ -483,11 +541,19 @@ slots: instantiates: sssom:Propagatable annotations: propagated: true + examples: + - value: rdfs:label + description: "The RDFS label property (rdfs:label) was used to match the object." + - value: skos:prefLabel + description: "The SKOS preferred label property (skos:prefLabel) was used to match the object." match_string: description: String that is shared by subj/obj. It is recommended to indicate the fields for the match using the object and subject_match_field slots. range: string multivalued: true + examples: + - value: "gala" + description: "The 'gala' string was matched for both subject and object." subject_preprocessing: description: Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is @@ -519,6 +585,9 @@ slots: The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule. range: EntityReference multivalued: true + examples: + - value: DISEASE_MAPPING_COMMONS_RULES:MPR2 + description: A reference to the Disease Mapping Commons rule with the ID MPR2. see_also: - https://github.com/mapping-commons/sssom/issues/166 - https://github.com/mapping-commons/sssom/pull/258 @@ -531,6 +600,9 @@ slots: perspective of the mapping_provider and (2) as an additional piece of metadata to augment the curation_rule element with a human readable text. range: string multivalued: true + examples: + - value: "The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality." + - value: "The two diseases are used synonymous in the medical literature." see_also: - https://github.com/mapping-commons/sssom/issues/166 - https://github.com/mapping-commons/sssom/pull/258 @@ -543,6 +615,9 @@ slots: range: double minimum_value: 0.0 maximum_value: 1.0 + examples: + - value: 0.95 + description: A similarity score of 0.95, indicating 95% similarity. see_also: - https://github.com/mapping-commons/sssom/issues/385 - https://github.com/mapping-commons/sssom/pull/386 @@ -556,11 +631,11 @@ slots: range: string examples: - value: https://www.wikidata.org/entity/Q865360 - description: (the Wikidata IRI for the Jaccard index measure). + description: the Wikidata IRI for the Jaccard index measure). - value: wikidata:Q865360 - description: (the Wikidata CURIE for the Jaccard index measure). + description: the Wikidata CURIE for the Jaccard index measure). - value: Levenshtein distance - description: (a score to measure the distance between two character sequences). + description: a score to measure the distance between two character sequences). see_also: - https://github.com/mapping-commons/sssom/issues/385 - https://github.com/mapping-commons/sssom/pull/386 @@ -570,7 +645,7 @@ slots: range: EntityReference examples: - value: SSSOM_GITHUB_ISSUE:166 - description: (A URL resolving to an issue discussing a new SSSOM element request) + description: A URL resolving to an issue discussing a new SSSOM element request see_also: - https://github.com/mapping-commons/sssom/issues/78 - https://github.com/mapping-commons/sssom/pull/259 @@ -580,7 +655,7 @@ slots: range: uri examples: - value: https://github.com/mapping-commons/mh_mapping_initiative/issues - description: (A URL resolving to the issue tracker of the Mouse-Human mapping initiative) + description: A URL resolving to the issue tracker of the Mouse-Human mapping initiative see_also: - https://github.com/mapping-commons/sssom/issues/78 - https://github.com/mapping-commons/sssom/pull/259 @@ -590,17 +665,27 @@ slots: per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment slot_uri: rdfs:seeAlso + examples: + - value: https://github.com/mapping-commons/mh_mapping_initiative/pull/41 + description: A URL pointing to the pull request that introduced the mapping. range: string multivalued: true other: - description: Pipe separated list of key value pairs for properties not part of - the SSSOM spec. Can be used to encode additional provenance data. + description: "Pipe separated list of key value pairs for properties not part of + the SSSOM spec. Can be used to encode additional provenance data. NOTE. This + field is not recommended for general use, and should be used sparingly. See + https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv + for an alternative approach based on extension slots." range: string comment: description: Free text field containing either curator notes or text generated by tool providing additional informative information. slot_uri: rdfs:comment range: string + examples: + - value: This mapping is weird in that the hierarchical position of the two terms + is very different. + description: A comment explaining a mapping authors reservation on a mapping. extension_definitions: description: A list that defines the extension slots used in the mapping set. range: extension definition @@ -610,7 +695,7 @@ slots: - https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv classes: mapping set: - description: Represents a set of mappings + description: Represents a set of mappings. slot_usage: license: required: true @@ -646,7 +731,7 @@ classes: - comment - extension_definitions mapping: - description: Represents an individual mapping between a pair of entities + description: Represents an individual mapping between a pair of entities. slots: - subject_id - subject_label From db7429d9bfb9f20047ee5a02558bcba9d58fe5c5 Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Mon, 4 Nov 2024 20:10:30 +0200 Subject: [PATCH 2/2] Update src/sssom_schema/schema/sssom_schema.yaml --- src/sssom_schema/schema/sssom_schema.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sssom_schema/schema/sssom_schema.yaml b/src/sssom_schema/schema/sssom_schema.yaml index 3001242f..7303fe0b 100644 --- a/src/sssom_schema/schema/sssom_schema.yaml +++ b/src/sssom_schema/schema/sssom_schema.yaml @@ -207,7 +207,7 @@ slots: - value: skos:broadMatch description: "From the SKOS primer: A triple skos:broader (and skos:broadMatch) asserts that , the object of the triple, is a broader concept than , the subject of the triple." - value: oboInOwl:hasDbXref - description: Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go + description: Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go). - value: rdfs:seeAlso description: The subject and the object are associated in some unspecified way. The object IRI often resolves to a resource on the web that provides additional information. predicate_modifier: