Skip to content

Commit

Permalink
Merge pull request #173 from BIH-CEI/170-change-datafield-to-work-wit…
Browse files Browse the repository at this point in the history
…h-datanode

hierarchical datamodel
  • Loading branch information
frehburg authored Oct 14, 2024
2 parents df5ff8a + 7f65e2f commit f7471b4
Show file tree
Hide file tree
Showing 9 changed files with 683 additions and 53 deletions.
318 changes: 308 additions & 10 deletions notebooks/erdri_cds_definition_in_code.ipynb

Large diffs are not rendered by default.

196 changes: 196 additions & 0 deletions notebooks/hierarchical_data_model.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-10-14T21:21:45.314262Z",
"start_time": "2024-10-14T21:21:45.309299Z"
}
},
"source": [
"from phenopacket_mapper.data_standards import DataField\n",
"from phenopacket_mapper.data_standards import DataModel, ValueSet, DataSection, OrGroup"
],
"outputs": [],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-14T21:21:45.340239Z",
"start_time": "2024-10-14T21:21:45.334880Z"
}
},
"cell_type": "code",
"source": [
"genomic_interpretation = DataModel(\n",
" data_model_name=\"Phenopacket schema Genomic Interpretation\",\n",
" fields=(\n",
" DataField(\n",
" name=\"subject_or_biosample_id\",\n",
" specification=str,\n",
" required=True,\n",
" description=\"The id of the patient or biosample that is the subject being interpreted. REQUIRED.\"\n",
" ),\n",
" \n",
" DataField(\n",
" name=\"interpretation_status\",\n",
" specification=ValueSet(\n",
" name=\"Interpretation Status Value Set\",\n",
" elements=[\"UNKNOWN_STATUS\", \"REJECTED\", \"CANDIDATE\", \"CONTRIBUTORY\", \"CAUSATIVE\"],\n",
" ),\n",
" required=True,\n",
" description=\"status of the interpretation. REQUIRED.\",\n",
" ),\n",
" \n",
" OrGroup(\n",
" name=\"call\",\n",
" fields=(\n",
" DataSection(\n",
" name=\"GeneDescriptor\",\n",
" fields=(\n",
" DataField(\n",
" name=\"value_id\",\n",
" specification=str,\n",
" required=True,\n",
" description=\"Official identifier of the gene. REQUIRED.\"\n",
" ),\n",
"\n",
" DataField(\n",
" name=\"symbol\",\n",
" specification=str,\n",
" required=True,\n",
" description=\"Official gene symbol. REQUIRED.\"\n",
" ),\n",
"\n",
" DataField(\n",
" name=\"description\",\n",
" specification=str,\n",
" required=False,\n",
" description=\"A free-text description of the gene\"\n",
" ),\n",
" ),\n",
" ),\n",
" ),\n",
" ),\n",
" )\n",
")"
],
"id": "2e979683ae450d9b",
"outputs": [],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-14T21:21:45.352293Z",
"start_time": "2024-10-14T21:21:45.347715Z"
}
},
"cell_type": "code",
"source": [
"s = str(genomic_interpretation)\n",
"\n",
"print(s)"
],
"id": "35a697d8b9b8236d",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"DataModel(\n",
"\tname: Phenopacket schema Genomic Interpretation\n",
"\tDataField(\n",
"\t\tid: subject_or_biosample_id,\n",
"\t\tname: subject_or_biosample_id,\n",
"\t\trequired: True\n",
"\t\tspecification: ValueSet(elements=[<class 'str'>], name='', description='')\n",
"\t\tcardinality: 1..n\n",
"\t)\n",
"\tDataField(\n",
"\t\tid: interpretation_status,\n",
"\t\tname: interpretation_status,\n",
"\t\trequired: True\n",
"\t\tspecification: ValueSet(elements=['UNKNOWN_STATUS', 'REJECTED', 'CANDIDATE', 'CONTRIBUTORY', 'CAUSATIVE'], name='Interpretation Status Value Set', description='')\n",
"\t\tcardinality: 1..n\n",
"\t)\n",
"\tOrGroup(\n",
"\t\tid: call,\n",
"\t\tname: call,\n",
"\t\trequired: False\n",
"\t\tcardinality: 0..n\n",
"\tDataSection(\n",
"\t\tid: genedescriptor,\n",
"\t\tname: GeneDescriptor,\n",
"\t\trequired: False\n",
"\t\tcardinality: 0..n\n",
"\tDataField(\n",
"\t\tid: value_id,\n",
"\t\tname: value_id,\n",
"\t\trequired: True\n",
"\t\tspecification: ValueSet(elements=[<class 'str'>], name='', description='')\n",
"\t\tcardinality: 1..n\n",
"\t)\n",
"\tDataField(\n",
"\t\tid: symbol,\n",
"\t\tname: symbol,\n",
"\t\trequired: True\n",
"\t\tspecification: ValueSet(elements=[<class 'str'>], name='', description='')\n",
"\t\tcardinality: 1..n\n",
"\t)\n",
"\tDataField(\n",
"\t\tid: description,\n",
"\t\tname: description,\n",
"\t\trequired: False\n",
"\t\tspecification: ValueSet(elements=[<class 'str'>], name='', description='')\n",
"\t\tcardinality: 0..n\n",
"\t)\n",
"\t)\n",
"\t)\n",
"---\n",
")\n"
]
}
],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-14T21:21:45.378046Z",
"start_time": "2024-10-14T21:21:45.375530Z"
}
},
"cell_type": "code",
"source": "",
"id": "4c78eb05ea58ff6c",
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
42 changes: 32 additions & 10 deletions src/phenopacket_mapper/_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
This package is intended to expose the PhenopacketMapper API to the user.
"""

import abc
from abc import ABCMeta, abstractmethod
from typing import Tuple, Iterable, Iterator
from dataclasses import dataclass

from phenopacket_mapper.data_standards import Cardinality

class DataModelDefiner(metaclass=abc.ABCMeta):

class DataModelDefiner(metaclass=ABCMeta):
"""
Take some data model definition and try to load it into :class:`DataModel`.
Expand All @@ -16,7 +17,7 @@ class DataModelDefiner(metaclass=abc.ABCMeta):
pass


class DataModel(metaclass=abc.ABCMeta):
class DataModel(metaclass=ABCMeta):
"""
Value class.
The fields:
Expand All @@ -29,8 +30,7 @@ class DataModel(metaclass=abc.ABCMeta):
pass


@dataclass
class DataNode(metaclass=abc.ABCMeta):
class DataNode(metaclass=ABCMeta):
"""
This is very much like Jackson (Java) `TreeNode`,
because it can be many things.
Expand All @@ -41,16 +41,38 @@ class DataNode(metaclass=abc.ABCMeta):
We want to be able to (de)serialize this.
"""
label: str
id: str
required: bool
@property
@abstractmethod
def name(self) -> str:
pass

@property
@abstractmethod
def id(self) -> str:
pass

@property
@abstractmethod
def required(self) -> bool:
pass


@property
@abstractmethod
def description(self) -> str:
pass

@property
@abstractmethod
def cardinality(self) -> Cardinality:
pass


class DataInstance:
pass


class Transformation(metaclass=abc.ABCMeta):
class Transformation(metaclass=ABCMeta):
"""
"""
Expand Down
9 changes: 5 additions & 4 deletions src/phenopacket_mapper/data_standards/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
"""This submodule defines the data standards used in the project."""

from .cardinality import Cardinality
from .date import Date
from .code_system import CodeSystem, SNOMED_CT, HPO, MONDO, OMIM, ORDO, LOINC
from .code import Coding, CodeableConcept
from .data_model import DataModel, DataField, DataModelInstance, DataFieldValue, DataSet
from .data_model import DataModel, DataField, DataModelInstance, DataFieldValue, DataSet, DataSection, OrGroup
from . import data_models
from .value_set import ValueSet

__all__ = [
"Cardinality",
"Coding", "CodeableConcept",
"DataModel", "DataField", "DataModelInstance", "DataFieldValue", "DataSet",
"DataModel", "DataField", "DataModelInstance", "DataFieldValue", "DataSet", "DataSection", "OrGroup",
"data_models",
"CodeSystem",
"SNOMED_CT", "HPO", "MONDO", "OMIM", "ORDO", "LOINC",
"Date",
"ValueSet"
"ValueSet",
]
22 changes: 22 additions & 0 deletions src/phenopacket_mapper/data_standards/cardinality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from dataclasses import dataclass, field
from typing import Union, Literal


@dataclass(slots=True, frozen=True)
class Cardinality:
min: int = field(default=0)
max: Union[int, Literal['n']] = field(default='n')

def __post_init__(self):
if not isinstance(self.min, int):
raise ValueError(f"Parameter min must be of type integer. (Not: {type(self.min)})")
elif self.min < 0:
raise ValueError(f"Parameter min must be a non-negative integer. (Not: {self.min})")
if not (isinstance(self.max, int) or self.max == 'n'):
raise ValueError(f"Parameter max must be of type or equal to the literal 'n'. "
f"(Not: {self.min} ({type(self.min)}))")
elif self.max != 'n' and self.max < 1: # has to be an integer
raise ValueError(f"Parameter max must be a positive integer. (Not: {self.min})")

def __str__(self):
return f"{self.min}..{self.max}"
Loading

0 comments on commit f7471b4

Please sign in to comment.