Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes for MVP-4632 #56

Merged
merged 2 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ More examples and detailed explanation can be found in jupyter notebook given in

https://github.com/INCATools/pandasaurus_cxg/blob/main/walkthrough.ipynb

## Library Documentation

https://incatools.github.io/pandasaurus_cxg/

## Roadmap

https://github.com/INCATools/pandasaurus_cxg/blob/main/ROADMAP.md
2 changes: 1 addition & 1 deletion pandasaurus_cxg/anndata_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def enriched_co_annotation_report(self, disease: Optional[str] = None):
def _enrich_co_annotation(enricher: AnndataEnricher):
enriched_df = enricher.enricher.enriched_df
if enriched_df.empty:
return pd.DataFrame()
return enriched_df
return enriched_df[enriched_df["o"].isin(enricher.seed_list)][["s_label", "o_label"]]

def _filter_data_and_drop_duplicates(self, field_name_1, field_name_2, disease):
Expand Down
6 changes: 6 additions & 0 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from rdflib.plugins.sparql import prepareQuery

from pandasaurus_cxg.enrichment_analysis import (
AnndataAnalyzer,
AnndataEnricher,
AnndataEnrichmentAnalyzer,
)
Expand All @@ -29,6 +30,7 @@
)
from pandasaurus_cxg.utils.exceptions import (
InvalidGraphFormat,
MissingAnalysisProcess,
MissingEnrichmentProcess,
)
from pandasaurus_cxg.utils.logging_config import configure_logger
Expand All @@ -55,6 +57,10 @@ def __init__(
"""
# TODO need to think about how to handle the requirement of enrichment and co_annotation_analysis methods
self.ea = enrichment_analyzer
if self.ea.analyzer_manager.report_df.empty:
analysis_methods = [i for i in dir(AnndataAnalyzer) if "_report" in i]
analysis_methods.sort()
raise MissingAnalysisProcess(analysis_methods)
# TODO need to handle invalid keys. We also need to discuss about keeping the keys param. DO NOT USE
self.df = (
enrichment_analyzer.analyzer_manager.report_df[keys]
Expand Down
12 changes: 10 additions & 2 deletions pandasaurus_cxg/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,21 @@ def __init__(self, missing_cell_types: List[str], cell_type_list: List[str]):
class MissingEnrichmentProcess(Exception):
def __init__(self, enrichment_methods: List[str]):
self.message = (
f"Any of the following enrichment methods from AnndataEnricher must be used before "
f"using enriched_rdf_graph method: "
f"Any of the following enrichment methods from AnndataEnricher must be used first; "
f"{', '.join(enrichment_methods)}"
)
super().__init__(self.message)


class MissingAnalysisProcess(Exception):
def __init__(self, analysis_methods: List[str]):
self.message = (
f"Any of the following analysis methods from AnndataAnalyser must be used first; "
f"{', '.join(analysis_methods)}"
)
super().__init__(self.message)


class SubclassWarning(Exception):
def __init__(self, relation: List[Tuple[str, str]]):
joined_relations = ", ".join(["-".join(rel) for rel in relation])
Expand Down
1,343 changes: 693 additions & 650 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandasaurus-cxg"
version = "0.1.5"
version = "0.1.6"
description = "Ontology enrichment tool for CxG standard AnnData files."
authors = ["Ugur Bayindir <ugur@ebi.ac.uk>"]
license = "http://www.apache.org/licenses/LICENSE-2.0"
Expand All @@ -12,9 +12,9 @@ python = "^3.9"
pandas = "^2.0.2"
anndata = "^0.9.1"
rdflib = "^6.3.2"
oaklib = "^0.5.13"
oaklib = "0.5.13"
matplotlib = "^3.7.2"
pandasaurus = "^0.3.5"
pandasaurus = "^0.3.6"
pygraphviz = "^1.11"
sphinx = { version = "^7.2.6", optional = true }
sphinx-rtd-theme = { version = "^1.3.0", optional = true }
Expand Down
41 changes: 27 additions & 14 deletions test/graph_generator/test_graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandasaurus_cxg.graph_generator.graph_predicates import CONSIST_OF
from pandasaurus_cxg.utils.exceptions import (
InvalidGraphFormat,
MissingAnalysisProcess,
MissingEnrichmentProcess,
)

Expand Down Expand Up @@ -53,33 +54,45 @@ def graph_generator_instance_for_kidney(enrichment_analyzer_instance_for_kidney_
return GraphGenerator(ea)


def test_graph_generator_init(enrichment_analyzer_instance_for_immune_data):
def test_graph_generator_init_missing_enrichment_process(enrichment_analyzer_instance_for_immune_data):
ea = enrichment_analyzer_instance_for_immune_data
ea.co_annotation_report()

with pytest.raises(MissingEnrichmentProcess) as exc_info:
GraphGenerator(ea)

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
assert exception.args[0] == expected_message


def test_graph_generator_init_missing_analysis_process(enrichment_analyzer_instance_for_immune_data):
ea = enrichment_analyzer_instance_for_immune_data
ea.enricher_manager.simple_enrichment()
ea.co_annotation_report()

# keys = ["author_cell_type", "cell_type"]
# graph_generator = GraphGenerator(ea, keys)
#
# assert graph_generator.ea == ea
# assert graph_generator.df.equals(ea.analyzer_manager.report_df[keys])
# assert graph_generator.cell_type_dict == {}
# assert graph_generator.ns == Namespace("http://example.org/")
# assert graph_generator.graph is not None
# assert graph_generator.label_priority is None
with pytest.raises(MissingAnalysisProcess) as exc_info:
GraphGenerator(ea)

exception = exc_info.value
expected_message = (
"Any of the following analysis methods from AnndataAnalyser must be used first; "
"co_annotation_report, enriched_co_annotation_report"
)

assert isinstance(exception, MissingAnalysisProcess)
assert exception.args[0] == expected_message


def test_graph_generator_init_with_valid_input(enrichment_analyzer_instance_for_immune_data):
ea = enrichment_analyzer_instance_for_immune_data
ea.enricher_manager.simple_enrichment()
ea.co_annotation_report()

graph_generator = GraphGenerator(ea)

Expand Down
22 changes: 11 additions & 11 deletions test/test_anndata_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ def test_filter_anndata_with_enriched_cell_type(sample_immune_data):

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
Expand Down Expand Up @@ -307,9 +307,9 @@ def test_annotate_anndata_with_cell_type(sample_immune_data):

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
Expand Down Expand Up @@ -369,10 +369,10 @@ def test_annotate_anndata_with_cell_type(sample_immune_data):
def test_set_enricher_property_list(sample_immune_data):
enricher = AnndataEnricher(sample_immune_data)

assert enricher.enricher._Query__enrichment_property_list == ["rdfs:subClassOf"]
assert enricher.enricher._enrichment_property_list == ["rdfs:subClassOf"]

enricher.set_enricher_property_list(["rdfs:subClassOf", "BFO:0000050"])
assert enricher.enricher._Query__enrichment_property_list == ["rdfs:subClassOf", "BFO:0000050"]
assert enricher.enricher._enrichment_property_list == ["rdfs:subClassOf", "BFO:0000050"]


def test_validate_slim_list(mocker, sample_immune_data, slim_data):
Expand Down Expand Up @@ -442,9 +442,9 @@ def test_check_subclass_relationships(sample_immune_data):

exception = exc_info.value
expected_message = (
"Any of the following enrichment methods from AnndataEnricher must be used before using "
"enriched_rdf_graph method: contextual_slim_enrichment, full_slim_enrichment, "
"minimal_slim_enrichment, simple_enrichment"
"Any of the following enrichment methods from AnndataEnricher must be used first; "
"contextual_slim_enrichment, full_slim_enrichment, minimal_slim_enrichment, "
"simple_enrichment"
)

assert isinstance(exception, MissingEnrichmentProcess)
Expand Down
Loading