Skip to content

Commit

Permalink
Changes for MVP-4632 (#56)
Browse files Browse the repository at this point in the history
* Made changes for MVP-4632

* Updated walkthrough.ipynb
  • Loading branch information
Ismail Ugur Bayindir authored Nov 1, 2023
1 parent c4e3137 commit cabb406
Show file tree
Hide file tree
Showing 9 changed files with 933 additions and 853 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ More examples and detailed explanation can be found in jupyter notebook given in

https://github.com/INCATools/pandasaurus_cxg/blob/main/walkthrough.ipynb

## Library Documentation

https://incatools.github.io/pandasaurus_cxg/

## Roadmap

https://github.com/INCATools/pandasaurus_cxg/blob/main/ROADMAP.md
2 changes: 1 addition & 1 deletion pandasaurus_cxg/anndata_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def enriched_co_annotation_report(self, disease: Optional[str] = None):
def _enrich_co_annotation(enricher: AnndataEnricher):
enriched_df = enricher.enricher.enriched_df
if enriched_df.empty:
return pd.DataFrame()
return enriched_df
return enriched_df[enriched_df["o"].isin(enricher.seed_list)][["s_label", "o_label"]]

def _filter_data_and_drop_duplicates(self, field_name_1, field_name_2, disease):
Expand Down
6 changes: 6 additions & 0 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from rdflib.plugins.sparql import prepareQuery

from pandasaurus_cxg.enrichment_analysis import (
AnndataAnalyzer,
AnndataEnricher,
AnndataEnrichmentAnalyzer,
)
Expand All @@ -29,6 +30,7 @@
)
from pandasaurus_cxg.utils.exceptions import (
InvalidGraphFormat,
MissingAnalysisProcess,
MissingEnrichmentProcess,
)
from pandasaurus_cxg.utils.logging_config import configure_logger
Expand All @@ -55,6 +57,10 @@ def __init__(
"""
# TODO need to think about how to handle the requirement of enrichment and co_annotation_analysis methods
self.ea = enrichment_analyzer
if self.ea.analyzer_manager.report_df.empty:
analysis_methods = [i for i in dir(AnndataAnalyzer) if "_report" in i]
analysis_methods.sort()
raise MissingAnalysisProcess(analysis_methods)
# TODO need to handle invalid keys. We also need to discuss about keeping the keys param. DO NOT USE
self.df = (
enrichment_analyzer.analyzer_manager.report_df[keys]
Expand Down
12 changes: 10 additions & 2 deletions pandasaurus_cxg/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,21 @@ def __init__(self, missing_cell_types: List[str], cell_type_list: List[str]):
class MissingEnrichmentProcess(Exception):
def __init__(self, enrichment_methods: List[str]):
self.message = (
f"Any of the following enrichment methods from AnndataEnricher must be used before "
f"using enriched_rdf_graph method: "
f"Any of the following enrichment methods from AnndataEnricher must be used first; "
f"{', '.join(enrichment_methods)}"
)
super().__init__(self.message)


class MissingAnalysisProcess(Exception):
def __init__(self, analysis_methods: List[str]):
self.message = (
f"Any of the following analysis methods from AnndataAnalyser must be used first; "
f"{', '.join(analysis_methods)}"
)
super().__init__(self.message)


class SubclassWarning(Exception):
def __init__(self, relation: List[Tuple[str, str]]):
joined_relations = ", ".join(["-".join(rel) for rel in relation])
Expand Down
1,343 changes: 693 additions & 650 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandasaurus-cxg"
version = "0.1.5"
version = "0.1.6"
description = "Ontology enrichment tool for CxG standard AnnData files."
authors = ["Ugur Bayindir <ugur@ebi.ac.uk>"]
license = "http://www.apache.org/licenses/LICENSE-2.0"
Expand All @@ -12,9 +12,9 @@ python = "^3.9"
pandas = "^2.0.2"
anndata = "^0.9.1"
rdflib = "^6.3.2"
oaklib = "^0.5.13"
oaklib = "0.5.13"
matplotlib = "^3.7.2"
pandasaurus = "^0.3.5"
pandasaurus = "^0.3.6"
pygraphviz = "^1.11"
sphinx = { version = "^7.2.6", optional = true }
sphinx-rtd-theme = { version = "^1.3.0", optional = true }
Expand Down
41 changes: 27 additions & 14 deletions test/graph_generator/test_graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandasaurus_cxg.graph_generator.graph_predicates import CONSIST_OF
from pandasaurus_cxg.utils.exceptions import (
InvalidGraphFormat,
MissingAnalysisProcess,
MissingEnrichmentProcess,
)

Expand Down Expand Up @@ -53,33 +54,45 @@ def graph_generator_instance_for_kidney(enrichment_analyzer_instance_for_kidney_
return GraphGenerator(ea)


def test_graph_generator_init(enrichment_analyzer_instance_for_immune_data):
def test_graph_generator_init_missing_enrichment_process(enrichment_analyzer_instance_for_immune_data):
ea = enrichment_analyzer_instance_for_immune_data
ea.co_annotation_report()

with pytest.raises(MissingEnrichmentProcess) as exc_info:
GraphGenerator(ea)

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
assert exception.args[0] == expected_message


def test_graph_generator_init_missing_analysis_process(enrichment_analyzer_instance_for_immune_data):
ea = enrichment_analyzer_instance_for_immune_data
ea.enricher_manager.simple_enrichment()
ea.co_annotation_report()

# keys = ["author_cell_type", "cell_type"]
# graph_generator = GraphGenerator(ea, keys)
#
# assert graph_generator.ea == ea
# assert graph_generator.df.equals(ea.analyzer_manager.report_df[keys])
# assert graph_generator.cell_type_dict == {}
# assert graph_generator.ns == Namespace("http://example.org/")
# assert graph_generator.graph is not None
# assert graph_generator.label_priority is None
with pytest.raises(MissingAnalysisProcess) as exc_info:
GraphGenerator(ea)

exception = exc_info.value
expected_message = (
"Any of the following analysis methods from AnndataAnalyser must be used first; "
"co_annotation_report, enriched_co_annotation_report"
)

assert isinstance(exception, MissingAnalysisProcess)
assert exception.args[0] == expected_message


def test_graph_generator_init_with_valid_input(enrichment_analyzer_instance_for_immune_data):
ea = enrichment_analyzer_instance_for_immune_data
ea.enricher_manager.simple_enrichment()
ea.co_annotation_report()

graph_generator = GraphGenerator(ea)

Expand Down
22 changes: 11 additions & 11 deletions test/test_anndata_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ def test_filter_anndata_with_enriched_cell_type(sample_immune_data):

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
Expand Down Expand Up @@ -307,9 +307,9 @@ def test_annotate_anndata_with_cell_type(sample_immune_data):

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
Expand Down Expand Up @@ -369,10 +369,10 @@ def test_annotate_anndata_with_cell_type(sample_immune_data):
def test_set_enricher_property_list(sample_immune_data):
enricher = AnndataEnricher(sample_immune_data)

assert enricher.enricher._Query__enrichment_property_list == ["rdfs:subClassOf"]
assert enricher.enricher._enrichment_property_list == ["rdfs:subClassOf"]

enricher.set_enricher_property_list(["rdfs:subClassOf", "BFO:0000050"])
assert enricher.enricher._Query__enrichment_property_list == ["rdfs:subClassOf", "BFO:0000050"]
assert enricher.enricher._enrichment_property_list == ["rdfs:subClassOf", "BFO:0000050"]


def test_validate_slim_list(mocker, sample_immune_data, slim_data):
Expand Down Expand Up @@ -442,9 +442,9 @@ def test_check_subclass_relationships(sample_immune_data):

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
Expand Down
Loading

0 comments on commit cabb406

Please sign in to comment.