Skip to content

Commit

Permalink
Added remove_special_characters method (#63)
Browse files Browse the repository at this point in the history
* Updated version to 0.1.11

* Added remove_special_characters method
  • Loading branch information
ubyndr authored Mar 15, 2024
1 parent 20e02a2 commit 43b2f02
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 3 deletions.
5 changes: 3 additions & 2 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
colour_mapping,
find_and_rotate_center_layout,
generate_subgraph,
remove_special_characters,
select_node_with_property,
)
from pandasaurus_cxg.graph_generator.graph_predicates import (
Expand Down Expand Up @@ -129,7 +130,7 @@ def generate_rdf_graph(self):
for k, v in inner_dict.items():
if k == "subcluster_of":
continue
self.graph.add((resource, self.ns[k], Literal(v)))
self.graph.add((resource, self.ns[remove_special_characters(k)], Literal(v)))

# add relationship between each resource based on their predicate in the co_annotation_report
# subcluster = self.ns["subcluster_of"]
Expand All @@ -139,7 +140,7 @@ def generate_rdf_graph(self):
for _uuid, inner_dict in grouped_dict_uuid.items():
resource = self.ns[_uuid]
for ik, iv in inner_dict.get("subcluster_of", {}).items():
predicate = URIRef(self.ns + ik)
predicate = self.ns[remove_special_characters(ik)]
for s, _, _ in self.graph.triples((None, predicate, Literal(iv))):
self.graph.add((resource, subcluster, s))

Expand Down
6 changes: 6 additions & 0 deletions pandasaurus_cxg/graph_generator/graph_generator_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

import networkx as nx
from rdflib import OWL, RDF, RDFS, BNode, Graph, Literal, Namespace, URIRef

Expand Down Expand Up @@ -120,3 +122,7 @@ def select_node_with_property(graph: Graph, _property: str, value: str):
return [str(s) for s in graph.subjects(predicate=RDFS.label, object=Literal(value))]
else:
return [str(s) for s in graph.subjects(predicate=ns[_property], object=Literal(value))]


def remove_special_characters(input_string: str) -> str:
return re.sub(r"[^a-zA-Z0-9_]", "", input_string.replace(" ", "_"))
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandasaurus-cxg"
version = "0.1.10"
version = "0.1.11"
description = "Ontology enrichment tool for CxG standard AnnData files."
authors = ["Ugur Bayindir <ugur@ebi.ac.uk>"]
license = "http://www.apache.org/licenses/LICENSE-2.0"
Expand Down
16 changes: 16 additions & 0 deletions test/graph_generator/test_graph_generator_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import networkx as nx
import pytest
from rdflib import OWL, RDF, RDFS, BNode, Graph, Literal, Namespace, URIRef

from pandasaurus_cxg.graph_generator.graph_generator_utils import (
Expand All @@ -7,6 +8,7 @@
add_outgoing_edges_to_subgraph,
find_and_rotate_center_layout,
generate_subgraph,
remove_special_characters,
select_node_with_property,
)
from pandasaurus_cxg.graph_generator.graph_predicates import (
Expand Down Expand Up @@ -183,3 +185,17 @@ def test_select_node_with_property_predicate():

assert len(result) == 1
assert "http://example.org/subject2" in result


@pytest.mark.parametrize(
"input_string, expected_output",
[
("Hello World!", "Hello_World"),
("123abc$%^", "123abc"),
("!@#$%^&*()_", "_"),
("_This_is_a_test_", "_This_is_a_test_"),
("", ""),
],
)
def test_remove_special_characters(input_string, expected_output):
assert remove_special_characters(input_string) == expected_output

0 comments on commit 43b2f02

Please sign in to comment.