Changes for MVP-4632 (#56)

* Made changes for MVP-4632 * Updated walkthrough.ipynb
INCATools · Nov 1, 2023 · cabb406 · cabb406
1 parent c4e3137
commit cabb406
Show file tree

Hide file tree

Showing 9 changed files with 933 additions and 853 deletions.
diff --git a/README.md b/README.md
@@ -71,6 +71,10 @@ More examples and detailed explanation can be found in jupyter notebook given in
 
 https://github.com/INCATools/pandasaurus_cxg/blob/main/walkthrough.ipynb
 
+## Library Documentation
+
+https://incatools.github.io/pandasaurus_cxg/
+
 ## Roadmap
 
 https://github.com/INCATools/pandasaurus_cxg/blob/main/ROADMAP.md
diff --git a/pandasaurus_cxg/anndata_analyzer.py b/pandasaurus_cxg/anndata_analyzer.py
@@ -174,7 +174,7 @@ def enriched_co_annotation_report(self, disease: Optional[str] = None):
     def _enrich_co_annotation(enricher: AnndataEnricher):
         enriched_df = enricher.enricher.enriched_df
         if enriched_df.empty:
-            return pd.DataFrame()
+            return enriched_df
         return enriched_df[enriched_df["o"].isin(enricher.seed_list)][["s_label", "o_label"]]
 
     def _filter_data_and_drop_duplicates(self, field_name_1, field_name_2, disease):

diff --git a/pandasaurus_cxg/graph_generator/graph_generator.py b/pandasaurus_cxg/graph_generator/graph_generator.py
@@ -10,6 +10,7 @@
 from rdflib.plugins.sparql import prepareQuery
 
 from pandasaurus_cxg.enrichment_analysis import (
+    AnndataAnalyzer,
     AnndataEnricher,
     AnndataEnrichmentAnalyzer,
 )
@@ -29,6 +30,7 @@
 )
 from pandasaurus_cxg.utils.exceptions import (
     InvalidGraphFormat,
+    MissingAnalysisProcess,
     MissingEnrichmentProcess,
 )
 from pandasaurus_cxg.utils.logging_config import configure_logger
@@ -55,6 +57,10 @@ def __init__(
         """
         # TODO need to think about how to handle the requirement of enrichment and co_annotation_analysis methods
         self.ea = enrichment_analyzer
+        if self.ea.analyzer_manager.report_df.empty:
+            analysis_methods = [i for i in dir(AnndataAnalyzer) if "_report" in i]
+            analysis_methods.sort()
+            raise MissingAnalysisProcess(analysis_methods)
         # TODO need to handle invalid keys. We also need to discuss about keeping the keys param. DO NOT USE
         self.df = (
             enrichment_analyzer.analyzer_manager.report_df[keys]

diff --git a/pandasaurus_cxg/utils/exceptions.py b/pandasaurus_cxg/utils/exceptions.py
@@ -32,13 +32,21 @@ def __init__(self, missing_cell_types: List[str], cell_type_list: List[str]):
 class MissingEnrichmentProcess(Exception):
     def __init__(self, enrichment_methods: List[str]):
         self.message = (
-            f"Any of the following enrichment methods from AnndataEnricher must be used before "
-            f"using enriched_rdf_graph method: "
+            f"Any of the following enrichment methods from AnndataEnricher must be used first; "
             f"{', '.join(enrichment_methods)}"
         )
         super().__init__(self.message)
 
 
+class MissingAnalysisProcess(Exception):
+    def __init__(self, analysis_methods: List[str]):
+        self.message = (
+            f"Any of the following analysis methods from AnndataAnalyser must be used first; "
+            f"{', '.join(analysis_methods)}"
+        )
+        super().__init__(self.message)
+
+
 class SubclassWarning(Exception):
     def __init__(self, relation: List[Tuple[str, str]]):
         joined_relations = ", ".join(["-".join(rel) for rel in relation])

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pandasaurus-cxg"
-version = "0.1.5"
+version = "0.1.6"
 description = "Ontology enrichment tool for CxG standard AnnData files."
 authors = ["Ugur Bayindir <ugur@ebi.ac.uk>"]
 license = "http://www.apache.org/licenses/LICENSE-2.0"
@@ -12,9 +12,9 @@ python = "^3.9"
 pandas = "^2.0.2"
 anndata = "^0.9.1"
 rdflib = "^6.3.2"
-oaklib = "^0.5.13"
+oaklib = "0.5.13"
 matplotlib = "^3.7.2"
-pandasaurus = "^0.3.5"
+pandasaurus = "^0.3.6"
 pygraphviz = "^1.11"
 sphinx = { version = "^7.2.6", optional = true }
 sphinx-rtd-theme = { version = "^1.3.0", optional = true }

diff --git a/test/graph_generator/test_graph_generator.py b/test/graph_generator/test_graph_generator.py
@@ -9,6 +9,7 @@
 from pandasaurus_cxg.graph_generator.graph_predicates import CONSIST_OF
 from pandasaurus_cxg.utils.exceptions import (
     InvalidGraphFormat,
+    MissingAnalysisProcess,
     MissingEnrichmentProcess,
 )
 
@@ -53,33 +54,45 @@ def graph_generator_instance_for_kidney(enrichment_analyzer_instance_for_kidney_
     return GraphGenerator(ea)
 
 
-def test_graph_generator_init(enrichment_analyzer_instance_for_immune_data):
+def test_graph_generator_init_missing_enrichment_process(enrichment_analyzer_instance_for_immune_data):
     ea = enrichment_analyzer_instance_for_immune_data
+    ea.co_annotation_report()
+
     with pytest.raises(MissingEnrichmentProcess) as exc_info:
         GraphGenerator(ea)
 
     exception = exc_info.value
     expected_message = (
-        "Any of the following enrichment methods from AnndataEnricher must be used before using "
-        "enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
-        "minimal_slim_enrichment, simple_enrichment"
+        "Any of the following enrichment methods from AnndataEnricher must be used first; "
+        "contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
+        "simple_enrichment"
     )
 
     assert isinstance(exception, MissingEnrichmentProcess)
     assert exception.args[0] == expected_message
 
+
+def test_graph_generator_init_missing_analysis_process(enrichment_analyzer_instance_for_immune_data):
+    ea = enrichment_analyzer_instance_for_immune_data
     ea.enricher_manager.simple_enrichment()
-    ea.co_annotation_report()
 
-    # keys = ["author_cell_type", "cell_type"]
-    # graph_generator = GraphGenerator(ea, keys)
-    #
-    # assert graph_generator.ea == ea
-    # assert graph_generator.df.equals(ea.analyzer_manager.report_df[keys])
-    # assert graph_generator.cell_type_dict == {}
-    # assert graph_generator.ns == Namespace("http://example.org/")
-    # assert graph_generator.graph is not None
-    # assert graph_generator.label_priority is None
+    with pytest.raises(MissingAnalysisProcess) as exc_info:
+        GraphGenerator(ea)
+
+    exception = exc_info.value
+    expected_message = (
+        "Any of the following analysis methods from AnndataAnalyser must be used first; "
+        "co_annotation_report, enriched_co_annotation_report"
+    )
+
+    assert isinstance(exception, MissingAnalysisProcess)
+    assert exception.args[0] == expected_message
+
+
+def test_graph_generator_init_with_valid_input(enrichment_analyzer_instance_for_immune_data):
+    ea = enrichment_analyzer_instance_for_immune_data
+    ea.enricher_manager.simple_enrichment()
+    ea.co_annotation_report()
 
     graph_generator = GraphGenerator(ea)
 

diff --git a/test/test_anndata_enricher.py b/test/test_anndata_enricher.py
@@ -268,9 +268,9 @@ def test_filter_anndata_with_enriched_cell_type(sample_immune_data):
 
     exception = exc_info.value
     expected_message = (
-        "Any of the following enrichment methods from AnndataEnricher must be used before using "
-        "enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
-        "minimal_slim_enrichment, simple_enrichment"
+        "Any of the following enrichment methods from AnndataEnricher must be used first; "
+        "contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
+        "simple_enrichment"
     )
 
     assert isinstance(exception, MissingEnrichmentProcess)
@@ -307,9 +307,9 @@ def test_annotate_anndata_with_cell_type(sample_immune_data):
 
     exception = exc_info.value
     expected_message = (
-        "Any of the following enrichment methods from AnndataEnricher must be used before using "
-        "enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
-        "minimal_slim_enrichment, simple_enrichment"
+        "Any of the following enrichment methods from AnndataEnricher must be used first; "
+        "contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
+        "simple_enrichment"
     )
 
     assert isinstance(exception, MissingEnrichmentProcess)
@@ -369,10 +369,10 @@ def test_annotate_anndata_with_cell_type(sample_immune_data):
 def test_set_enricher_property_list(sample_immune_data):
     enricher = AnndataEnricher(sample_immune_data)
 
-    assert enricher.enricher._Query__enrichment_property_list == ["rdfs:subClassOf"]
+    assert enricher.enricher._enrichment_property_list == ["rdfs:subClassOf"]
 
     enricher.set_enricher_property_list(["rdfs:subClassOf", "BFO:0000050"])
-    assert enricher.enricher._Query__enrichment_property_list == ["rdfs:subClassOf", "BFO:0000050"]
+    assert enricher.enricher._enrichment_property_list == ["rdfs:subClassOf", "BFO:0000050"]
 
 
 def test_validate_slim_list(mocker, sample_immune_data, slim_data):
@@ -442,9 +442,9 @@ def test_check_subclass_relationships(sample_immune_data):
 
     exception = exc_info.value
     expected_message = (
-        "Any of the following enrichment methods from AnndataEnricher must be used before using "
-        "enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
-        "minimal_slim_enrichment, simple_enrichment"
+        "Any of the following enrichment methods from AnndataEnricher must be used first; "
+        "contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
+        "simple_enrichment"
     )
 
     assert isinstance(exception, MissingEnrichmentProcess)