Skip to content

Commit

Permalink
Dataset object populated with properties from h5ad uns (#70)
Browse files Browse the repository at this point in the history
* Updated version to 0.1.13

* Added dataset object to rdf representation
  • Loading branch information
ubyndr authored Apr 18, 2024
1 parent df6ac5e commit e774404
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
17 changes: 15 additions & 2 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from pandasaurus_cxg.graph_generator.graph_predicates import (
CLUSTER,
CONSIST_OF,
DATASET,
HAS_SOURCE,
SUBCLUSTER_OF,
)
from pandasaurus_cxg.utils.exceptions import (
Expand Down Expand Up @@ -119,21 +121,32 @@ def generate_rdf_graph(self):
if temp_dict not in grouped_dict_uuid.values():
grouped_dict_uuid[str(uuid.uuid4())] = temp_dict

# generate dataset entity and has_source property
dataset_class = URIRef(self.ns[str(uuid.uuid4())])
self.graph.add((dataset_class, RDF.type, URIRef(DATASET.get("iri"))))
self.graph.add((dataset_class, RDFS.label, Literal(DATASET.get("label"))))
uns = self.ea.enricher_manager.anndata.uns
for key, value in uns.items():
if not isinstance(value, str):
continue
self.graph.add((dataset_class, URIRef(self.ns[key]), Literal(value)))
has_source = URIRef(HAS_SOURCE["iri"])
self.graph.add((has_source, RDFS.label, Literal(HAS_SOURCE["label"])))

# generate a resource for each free-text cell_type annotation and cell_type_ontology_term annotation
# cell_set_class = self.ns["CellSet"]
cell_set_class = URIRef(CLUSTER.get("iri"))
self.graph.add((cell_set_class, RDF.type, OWL.Class))
self.graph.add((cell_set_class, RDFS.label, Literal(CLUSTER.get("label"))))
for _uuid, inner_dict in grouped_dict_uuid.items():
resource = self.ns[_uuid]
self.graph.add((resource, RDF.type, cell_set_class))
self.graph.add((resource, has_source, dataset_class))
for k, v in inner_dict.items():
if k == "subcluster_of":
continue
self.graph.add((resource, self.ns[remove_special_characters(k)], Literal(v)))

# add relationship between each resource based on their predicate in the co_annotation_report
# subcluster = self.ns["subcluster_of"]
subcluster = URIRef(SUBCLUSTER_OF.get("iri"))
self.graph.add((subcluster, RDFS.label, Literal(SUBCLUSTER_OF.get("label"))))
self.graph.add((subcluster, RDF.type, OWL.ObjectProperty))
Expand Down
4 changes: 4 additions & 0 deletions pandasaurus_cxg/graph_generator/graph_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@
SUBCLUSTER_OF = {"iri": "http://purl.obolibrary.org/obo/RO_0015003", "label": "subcluster of"}

CLUSTER = {"iri": "http://purl.obolibrary.org/obo/PCL_0010001", "label": "cell cluster"}

DATASET = {"iri": "https://schema.org/Dataset", "label": "dataset"}

HAS_SOURCE = {"iri": "http://purl.org/dc/terms/source", "label": "has_source"}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandasaurus-cxg"
version = "0.1.12"
version = "0.1.13"
description = "Ontology enrichment tool for CxG standard AnnData files."
authors = ["Ugur Bayindir <ugur@ebi.ac.uk>"]
license = "http://www.apache.org/licenses/LICENSE-2.0"
Expand Down
10 changes: 5 additions & 5 deletions test/graph_generator/test_graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ def test_graph_generator_init_with_valid_input(enrichment_analyzer_instance_for_
def test_generate_rdf_graph(graph_generator_instance_for_kidney):
graph_generator = graph_generator_instance_for_kidney
graph_generator.generate_rdf_graph()
assert len(graph_generator.graph) == 530
assert len(graph_generator.graph) == 629
assert (
len([[s, p, o] for s, p, o in graph_generator.graph.triples((None, RDF.type, None))]) == 143
len([[s, p, o] for s, p, o in graph_generator.graph.triples((None, RDF.type, None))]) == 144
)
assert (
len([[s, p, o] for s, p, o in graph_generator.graph.triples((None, RDFS.label, None))])
== 19
== 21
)
assert (
len(
Expand Down Expand Up @@ -152,11 +152,11 @@ def test_enrich_rdf_graph(graph_generator_instance_for_kidney):
graph_generator = graph_generator_instance_for_kidney
graph_generator.generate_rdf_graph()

assert len(graph_generator.graph) == 530
assert len(graph_generator.graph) == 629

graph_generator.enrich_rdf_graph()

assert len(graph_generator.graph) == 1025
assert len(graph_generator.graph) == 1124
assert (
URIRef(CONSIST_OF.get("iri")),
RDFS.label,
Expand Down

0 comments on commit e774404

Please sign in to comment.