Skip to content

Commit

Permalink
Always transitively merge (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
dobraczka authored Mar 20, 2024
1 parent 4429e0f commit 8cbc026
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
18 changes: 9 additions & 9 deletions src/eche/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,8 @@ def _contains_overlaps(self, data):
return False

def _from_sets(self, data: Iterable[Set]):
# check if contains overlaps
if self._contains_overlaps(data):
# merge overlapping
data = connected_components(data)
# merge overlapping
data = connected_components(data)

for cluster_id, inner in enumerate(data):
if not isinstance(inner, set):
Expand Down Expand Up @@ -158,13 +156,15 @@ def __init__(
return
if not isinstance(data, (dict, list)):
raise TypeError(f"Only list or dict allowed, but got {type(data)}")
if isinstance(data, list):
self._from_sets(data)
elif isinstance(data, dict) and len(data) != 0:
if isinstance(data, dict) and len(data) != 0:
if isinstance(next(iter(data.values())), set):
self._from_clusters(data)
else:
self._from_dict(data)
return
# assume binary links as key-value pairs
# transform to iterable sets of 2
data = (set(pair) for pair in data.items())
if isinstance(data, Iterable):
self._from_sets(data)

def __repr__(self):
return f"ClusterHelper(elements={self.elements!s},clusters={self.clusters!s})"
Expand Down
15 changes: 11 additions & 4 deletions tests/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,6 @@ def test_clusters_init():
}
assert {frozenset(c) for c in ch_sets.clusters.values()} == expected_clusters

# assert no selflinks
with pytest.raises(ValueError, match="selflinks"):
ClusterHelper({"1": "1"})

# assert no multiple cluster memberships with cluster init
with pytest.raises(ValueError, match="multiple membership"):
ClusterHelper({0: {"1", "2"}, 1: {"1", "3"}})
Expand Down Expand Up @@ -492,3 +488,14 @@ def test_from_zipped_file(
assert expected_prefixed_pairs == set(
ch.pairs_in_ds_tuple(ds_tuple=_LEFT_RIGHT_NAMES)
)


def test_transitivity_for_all_inits():
gold = {0: {"a", "b", "c"}}
assert ClusterHelper(gold).clusters == gold
assert ClusterHelper({"a": "b", "b": "c"}).clusters == gold
assert ClusterHelper([{"a", "b"}, {"b", "c"}]).clusters == gold
assert ClusterHelper([{"a", "b", "c"}]).clusters == gold

# self-links should not matter anymore
assert ClusterHelper({"a": "a", "b": "a", "c": "b"}).clusters == gold

0 comments on commit 8cbc026

Please sign in to comment.