Skip to content

Commit

Permalink
Refactored CURIE generation logic for annotated_target (#76)
Browse files Browse the repository at this point in the history
* Refactored annotated_target curie generation

* Updated version to 0.1.18
  • Loading branch information
ubyndr authored Jul 1, 2024
1 parent 63362a5 commit 848ca96
Show file tree
Hide file tree
Showing 4 changed files with 536 additions and 490 deletions.
28 changes: 19 additions & 9 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
HAS_SOURCE,
SUBCLUSTER_OF,
)
from pandasaurus_cxg.graph_generator.graph_namespaces import prefixes
from pandasaurus_cxg.utils.exceptions import (
InvalidGraphFormat,
MissingAnalysisProcess,
Expand Down Expand Up @@ -141,7 +142,9 @@ def generate_rdf_graph(self):
)
)

self.graph.add((dataset_class, URIRef(self.ns[remove_special_characters(key)]), Literal(value)))
self.graph.add(
(dataset_class, URIRef(self.ns[remove_special_characters(key)]), Literal(value))
)
has_source = URIRef(HAS_SOURCE["iri"])
self.graph.add((has_source, RDFS.label, Literal(HAS_SOURCE["label"])))

Expand Down Expand Up @@ -173,7 +176,7 @@ def generate_rdf_graph(self):
self.graph = graphgen.apply_transitive_reduction(self.graph, [subcluster.toPython()])

# add cell_type nodes and consists_of relations
cl_namespace = Namespace("http://purl.obolibrary.org/obo/CL_")
cl_namespace = Namespace(prefixes.get("CL"))
consist_of = URIRef(CONSIST_OF.get("iri"))
self.graph.add((consist_of, RDFS.label, Literal(CONSIST_OF.get("label"))))
for curie, label in self.ea.enricher_manager.seed_dict.items():
Expand Down Expand Up @@ -243,14 +246,21 @@ def add_metadata_nodes(self, metadata_fields: List[str]):
obs[obs[a_cell_type] == str(literal)][metadata].value_counts(normalize=True)
* 100
).loc[lambda x: x != 0.0]

# Extract the ontology term ID mapping
ontology_term_id_mapping = (
obs[[metadata, f"{metadata}_ontology_term_id"]]
.drop_duplicates()
.set_index(metadata)
.to_dict()[f"{metadata}_ontology_term_id"]
)

for label, percentage in percentages.items():
annotated_target = self.graph.value(
predicate=RDFS.label, object=Literal(label)
)
if annotated_target is None:
annotated_target = URIRef(self.ns[str(uuid.uuid4())])
self.graph.add((annotated_target, RDF.type, self.ns[metadata]))
self.graph.add((annotated_target, RDFS.label, Literal(label)))
ontology_term_id = ontology_term_id_mapping.get(label).split(":")
annotated_target = Namespace(prefixes.get(ontology_term_id[0]))[
ontology_term_id[-1]
]
self.graph.add((annotated_target, RDFS.label, Literal(label)))
bnode_axiom = BNode()
self.graph.add((bnode_axiom, RDF.type, OWL.Axiom))
self.graph.add((bnode_axiom, OWL.annotatedSource, s))
Expand Down
8 changes: 8 additions & 0 deletions pandasaurus_cxg/graph_generator/graph_namespaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
prefixes = {
"CL": "http://purl.obolibrary.org/obo/CL_",
"UBERON": "http://purl.obolibrary.org/obo/UBERON_",
"PATO": "http://purl.obolibrary.org/obo/PATO_",
"MONDO": "http://purl.obolibrary.org/obo/MONDO_",
"HsapDv": "http://purl.obolibrary.org/obo/HsapDv_",
"NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
}
Loading

0 comments on commit 848ca96

Please sign in to comment.