Skip to content

Commit

Permalink
Moved enrichment method check from init to enrich_rdf_graph (#62)
Browse files Browse the repository at this point in the history
  • Loading branch information
ubyndr authored Mar 14, 2024
1 parent 5895bb8 commit f8d1d06
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 56 deletions.
42 changes: 21 additions & 21 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,26 +68,6 @@ def __init__(
if keys
else enrichment_analyzer.analyzer_manager.report_df
)
if self.ea.enricher_manager.enricher.enriched_df.empty:
# TODO or we can just call simple_enrichment method
enrichment_methods = [i for i in dir(AnndataEnricher) if "_enrichment" in i]
enrichment_methods.sort()
raise MissingEnrichmentProcess(enrichment_methods)
self.cell_type_dict = (
pd.concat(
[
self.ea.enricher_manager.enricher.enriched_df[["s", "s_label"]],
self.ea.enricher_manager.enricher.enriched_df[["o", "o_label"]].rename(
columns={"o": "s", "o_label": "s_label"}
),
],
axis=0,
ignore_index=True,
)
.drop_duplicates()
.set_index("s")["s_label"]
.to_dict()
)
self.ns = Namespace("http://example.org/")
self.graph = Graph()
self.label_priority = None
Expand Down Expand Up @@ -173,11 +153,31 @@ def enrich_rdf_graph(self):
Returns:
"""
if self.ea.enricher_manager.enricher.enriched_df.empty:
# TODO or we can just call simple_enrichment method
enrichment_methods = [i for i in dir(AnndataEnricher) if "_enrichment" in i]
enrichment_methods.sort()
raise MissingEnrichmentProcess(enrichment_methods)
cell_type_dict = (
pd.concat(
[
self.ea.enricher_manager.enricher.enriched_df[["s", "s_label"]],
self.ea.enricher_manager.enricher.enriched_df[["o", "o_label"]].rename(
columns={"o": "s", "o_label": "s_label"}
),
],
axis=0,
ignore_index=True,
)
.drop_duplicates()
.set_index("s")["s_label"]
.to_dict()
)
# add cell_type nodes and consists_of relations
cl_namespace = Namespace("http://purl.obolibrary.org/obo/CL_")
consist_of = URIRef(CONSIST_OF.get("iri"))
self.graph.add((consist_of, RDFS.label, Literal(CONSIST_OF.get("label"))))
for curie, label in self.cell_type_dict.items():
for curie, label in cell_type_dict.items():
resource = cl_namespace[curie.split(":")[-1]]
self.graph.add((resource, RDFS.label, Literal(label)))
self.graph.add((resource, RDF.type, OWL.Class))
Expand Down
56 changes: 21 additions & 35 deletions test/graph_generator/test_graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,6 @@ def graph_generator_instance_for_kidney(enrichment_analyzer_instance_for_kidney_
return GraphGenerator(ea)


def test_graph_generator_init_missing_enrichment_process(
enrichment_analyzer_instance_for_immune_data,
):
ea = enrichment_analyzer_instance_for_immune_data
ea.co_annotation_report()

with pytest.raises(MissingEnrichmentProcess) as exc_info:
GraphGenerator(ea)

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
assert exception.args[0] == expected_message


def test_graph_generator_init_missing_analysis_process(
enrichment_analyzer_instance_for_immune_data,
):
Expand Down Expand Up @@ -102,19 +82,6 @@ def test_graph_generator_init_with_valid_input(enrichment_analyzer_instance_for_

assert graph_generator.ea == ea
assert graph_generator.df.equals(ea.analyzer_manager.report_df)
assert graph_generator.cell_type_dict == {
"CL:0000798": "gamma-delta T cell",
"CL:0000809": "double-positive, alpha-beta thymocyte",
"CL:0000813": "memory T cell",
"CL:0000815": "regulatory T cell",
"CL:0000895": "naive thymus-derived CD4-positive, alpha-beta T cell",
"CL:0000897": "CD4-positive, alpha-beta memory T cell",
"CL:0000900": "naive thymus-derived CD8-positive, alpha-beta T cell",
"CL:0000909": "CD8-positive, alpha-beta memory T cell",
"CL:0000940": "mucosal invariant T cell",
"CL:0002489": "double negative thymocyte",
"CL:0000084": "T cell",
}
assert graph_generator.ns == Namespace("http://example.org/")
assert graph_generator.graph is not None
assert graph_generator.label_priority is None
Expand Down Expand Up @@ -161,6 +128,25 @@ def test_generate_rdf_graph(graph_generator_instance_for_kidney):
assert len(graph_generator.graph) == 1


def test_enrich_graph_missing_enrichment_process(enrichment_analyzer_instance_for_kidney_data):
ea = enrichment_analyzer_instance_for_kidney_data
ea.co_annotation_report()
gg = GraphGenerator(ea)

with pytest.raises(MissingEnrichmentProcess) as exc_info:
gg.enrich_rdf_graph()

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
assert exception.args[0] == expected_message


def test_enrich_rdf_graph(graph_generator_instance_for_kidney):
graph_generator = graph_generator_instance_for_kidney
graph_generator.generate_rdf_graph()
Expand All @@ -169,7 +155,7 @@ def test_enrich_rdf_graph(graph_generator_instance_for_kidney):

graph_generator.enrich_rdf_graph()

assert len(graph_generator.graph) == 903
assert len(graph_generator.graph) == 1013
assert (
URIRef(CONSIST_OF.get("iri")),
RDFS.label,
Expand All @@ -186,7 +172,7 @@ def test_enrich_rdf_graph(graph_generator_instance_for_kidney):
if str(s).startswith("http://purl.obolibrary.org/obo/CL_")
]
)
== 417
== 527
)


Expand Down

0 comments on commit f8d1d06

Please sign in to comment.