Skip to content

Commit

Permalink
Provide term names for contextual enrichment terms (#45)
Browse files Browse the repository at this point in the history
* Format changes

* Added pygraphviz

* Refactored context_list usage and added get_context_list method

* Version has been updated to 0.1.3
  • Loading branch information
Ismail Ugur Bayindir authored Sep 18, 2023
1 parent b03690b commit 2167c32
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 15 deletions.
39 changes: 27 additions & 12 deletions pandasaurus_cxg/anndata_enricher.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import itertools
from typing import List, Optional
from typing import List, Optional, Tuple

import pandas as pd
from anndata import AnnData
Expand All @@ -23,6 +23,7 @@ def __init__(
anndata: AnnData,
cell_type_field: Optional[str] = "cell_type_ontology_term_id",
context_field: Optional[str] = "tissue_ontology_term_id",
context_field_label: Optional[str] = "tissue",
ontology_list_for_slims: Optional[List[str]] = None,
):
"""Initialize the AnndataEnricher instance with AnnData object.
Expand All @@ -42,12 +43,13 @@ def __init__(
ontology_list_for_slims = ["Cell Ontology"]
# TODO Do we need to keep whole anndata? Would it be enough to keep the obs only?
self._anndata = anndata
self.__seed_list = self._anndata.obs[cell_type_field].unique().tolist()
self.enricher = Query(self.__seed_list)
self.__context_list = (
self._seed_list = self._anndata.obs[cell_type_field].unique().tolist()
self.enricher = Query(self._seed_list)
unique_context = self._anndata.obs[[context_field, context_field_label]].drop_duplicates()
self._context_list = (
None
if context_field not in self._anndata.obs.keys()
else self._anndata.obs[context_field].unique().tolist()
else dict(zip(unique_context[context_field], unique_context[context_field_label]))
)
self.slim_list = [
slim
Expand Down Expand Up @@ -125,8 +127,8 @@ def contextual_slim_enrichment(self) -> Optional[pd.DataFrame]:
"""
# TODO Better handle datasets without tissue field
return (
self.enricher.contextual_slim_enrichment(self.__context_list)
if self.__context_list
self.enricher.contextual_slim_enrichment(list(self._context_list.keys()))
if self._context_list
else None
)

Expand Down Expand Up @@ -211,7 +213,7 @@ def set_enricher_property_list(self, property_list: List[str]):
Args:
property_list (List[str]): The list of properties to include in the enrichment analysis.
"""
self.enricher = Query(self.__seed_list, property_list)
self.enricher = Query(self._seed_list, property_list)

def validate_slim_list(self, slim_list):
"""Check if any slim term in the given list is invalid.
Expand All @@ -230,11 +232,14 @@ def validate_slim_list(self, slim_list):
raise InvalidSlimName(invalid_slim_list, self.slim_list)

def get_seed_list(self):
return self.__seed_list
return self._seed_list

def get_anndata(self):
return self._anndata

def get_context_list(self):
return self._context_list

def create_cell_type_dict(self):
# TODO Add empty dataframe exception
return (
Expand All @@ -253,7 +258,17 @@ def create_cell_type_dict(self):
.to_dict()
)

def check_subclass_relationships(self, cell_type_list: List[str]):
def check_subclass_relationships(self, cell_type_list: List[str]) -> List[Tuple[str, str]]:
"""
Check for subclass relationships between cell type ontology terms.
Args:
cell_type_list: A list of cell type ontology term IDs to be used
for cell type annotation.
Returns:
A list of cell type pairs that have a subClassOf relationship between them.
"""
# TODO Add empty dataframe exception
subclass_relation = []
for s, o in itertools.combinations(cell_type_list, 2):
Expand All @@ -262,12 +277,12 @@ def check_subclass_relationships(self, cell_type_list: List[str]):
& (self.enricher.enriched_df["p"] == "rdfs:subClassOf")
& (self.enricher.enriched_df["o"] == o)
].empty:
subclass_relation.append([s, o])
subclass_relation.append((s, o))

if not self.enricher.enriched_df[
(self.enricher.enriched_df["s"] == o)
& (self.enricher.enriched_df["p"] == "rdfs:subClassOf")
& (self.enricher.enriched_df["o"] == s)
].empty:
subclass_relation.append([o, s])
subclass_relation.append((o, s))
return subclass_relation
4 changes: 2 additions & 2 deletions pandasaurus_cxg/utils/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import List, Tuple


class InvalidSlimName(Exception):
Expand Down Expand Up @@ -40,7 +40,7 @@ def __init__(self, enrichment_methods: List[str]):


class SubclassWarning(Exception):
def __init__(self, relation: List[List[str]]):
def __init__(self, relation: List[Tuple[str, str]]):
joined_relations = ", ".join(["-".join(rel) for rel in relation])
self.message = (
f"The following cell type terms are related with subClassOf relation. "
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandasaurus-cxg"
version = "0.1.2"
version = "0.1.3"
description = "Ontology enrichment tool for CxG standard AnnData files."
authors = ["Ugur Bayindir <ugur@ebi.ac.uk>"]
license = "http://www.apache.org/licenses/LICENSE-2.0"
Expand Down

0 comments on commit 2167c32

Please sign in to comment.