🎨 documentation

GiulioRossetti · May 19, 2024 · d8a9a6c · d8a9a6c
1 parent 8035ccd
commit d8a9a6c
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 62 deletions.
diff --git a/cdlib/lifecycles/algorithms/event_analysis.py b/cdlib/lifecycles/algorithms/event_analysis.py
@@ -69,14 +69,30 @@ def event_weights_from_flow(analyzed_flows: dict, direction: str) -> dict:
 
 def _compute_event_scores(analyzed_flow: dict) -> list:
     return [
-        (analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * analyzed_flow["Outflow"],
-        (1 - analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * analyzed_flow["Outflow"],
-        (analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * analyzed_flow["Outflow"],
-        (1 - analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * analyzed_flow["Outflow"],
-        (analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * (1 - analyzed_flow["Outflow"]),
-        (1 - analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * (1 - analyzed_flow["Outflow"]),
-        (analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * (1 - analyzed_flow["Outflow"]),
-        (1 - analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * (1 - analyzed_flow["Outflow"]),
+        (analyzed_flow["Unicity"])
+        * (1 - analyzed_flow["Identity"])
+        * analyzed_flow["Outflow"],
+        (1 - analyzed_flow["Unicity"])
+        * (1 - analyzed_flow["Identity"])
+        * analyzed_flow["Outflow"],
+        (analyzed_flow["Unicity"])
+        * analyzed_flow["Identity"]
+        * analyzed_flow["Outflow"],
+        (1 - analyzed_flow["Unicity"])
+        * analyzed_flow["Identity"]
+        * analyzed_flow["Outflow"],
+        (analyzed_flow["Unicity"])
+        * analyzed_flow["Identity"]
+        * (1 - analyzed_flow["Outflow"]),
+        (1 - analyzed_flow["Unicity"])
+        * analyzed_flow["Identity"]
+        * (1 - analyzed_flow["Outflow"]),
+        (analyzed_flow["Unicity"])
+        * (1 - analyzed_flow["Identity"])
+        * (1 - analyzed_flow["Outflow"]),
+        (1 - analyzed_flow["Unicity"])
+        * (1 - analyzed_flow["Identity"])
+        * (1 - analyzed_flow["Outflow"]),
     ]
 
 

diff --git a/cdlib/lifecycles/algorithms/measures.py b/cdlib/lifecycles/algorithms/measures.py
@@ -75,46 +75,6 @@ def _max_second_difference(labels):
     return max_val - second_largest
 
 
-def _berger_parker_index(labels):
-    """
-    Dominance index, the probability of the most frequent attribute value in the set
-
-    Args:
-        labels (_type_): _description_
-
-    Returns:
-        _type_: _description_
-    """
-    n = len(labels)
-    counter = Counter(labels)
-    probabilities = [count / n for count in counter.values()]
-    max_val = np.max(probabilities)
-    return max_val
-
-
-def _gini_index(labels):
-
-    n = len(labels)
-    counter = Counter(labels)
-    probabilities = [count / n for count in counter.values()]
-
-    array = np.array(probabilities)
-    """Calculate the Gini coefficient of a numpy array."""
-    # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
-    # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
-
-    array = array.flatten()  # all values are treated equally, arrays must be 1d
-    if np.amin(array) < 0:
-        array -= np.amin(array)  # values cannot be negative
-    array += 0.0000001  # values cannot be 0
-    array = np.sort(array)  # values must be sorted
-    index = np.arange(1, array.shape[0] + 1)  # index per array element
-    n = array.shape[0]  # number of array elements
-    return (np.sum((2 * index - n - 1) * array)) / (
-        n * np.sum(array)
-    )  # Gini coefficient
-
-
 def facet_unicity(labels: list) -> float:
     """
     the unicity facet quantifies the extent to which a target set comes from one (=1) or multiple (->0) flows.

diff --git a/cdlib/lifecycles/classes/matching.py b/cdlib/lifecycles/classes/matching.py
@@ -95,16 +95,6 @@ def __add_partition(self, partition: list) -> None:
                     tmp = set()
                     tmp.add(group)
                     self.named_sets[name] = tmp
-
-            elif self.dtype == dict:
-                for elem in group:
-                    to_str = json.dumps(elem)
-                    self.named_sets[name].add(to_str)
-
-            elif self.dtype == list:
-                for elem in group:
-                    to_str = str(elem)
-                    self.named_sets[name].add(to_str)
             else:
                 raise NotImplementedError("dtype not supported")
 

diff --git a/cdlib/test/test_events.py b/cdlib/test/test_events.py
@@ -1,7 +1,13 @@
 import unittest
+import cdlib
 from cdlib import algorithms
 from cdlib import LifeCycle
 from cdlib import TemporalClustering
+from cdlib.lifecycles.algorithms.event_analysis import (
+    facets,
+    event_weights,
+    event as evn,
+)
 from plotly import graph_objects as go
 import networkx as nx
 from networkx.generators.community import LFR_benchmark_graph
@@ -79,7 +85,9 @@ def test_custom_matching(self):
         c = events.analyze_flows("+")
         self.assertIsInstance(c, dict)
 
-        events.compute_events_with_custom_matching(jaccard, two_sided=False, threshold=0)
+        events.compute_events_with_custom_matching(
+            jaccard, two_sided=False, threshold=0
+        )
         c = events.analyze_flows("+")
         self.assertIsInstance(c, dict)
 
@@ -205,7 +213,7 @@ def random_attributes():
         attrs = events.get_attribute("fakeattribute")
         self.assertIsInstance(attrs, dict)
 
-        events.analyze_flow("1_1", "+",  attr="fakeattribute")
+        events.analyze_flow("1_1", "+", attr="fakeattribute")
         self.assertIsInstance(attrs, dict)
 
         ev = events.get_event("1_1")
@@ -218,8 +226,43 @@ def random_attributes():
         a = ev.to_event  # to get the to events of the community 1_2
         self.assertIsInstance(a, dict)
 
+    def test_marginal(self):
+        tc = TemporalClustering()
+        for t in range(0, 10):
+            g = LFR_benchmark_graph(
+                n=250,
+                tau1=3,
+                tau2=1.5,
+                mu=0.1,
+                average_degree=5,
+                min_community=20,
+                seed=10,
+            )
+            coms = algorithms.louvain(g)  # here any CDlib algorithm can be applied
+            tc.add_clustering(coms, t)
 
+        events = LifeCycle(tc)
+        events.compute_events("facets")
 
+        # marginal tests (not all methods are tested since they are not of use in cdlib -
+        # they are invoked for completeness)
+        self.assertIsInstance(
+            events.cm.slice(0, 5), cdlib.lifecycles.classes.matching.CommunityMatching
+        )
+        self.assertIsInstance(events.cm.universe_set(), set)
+        self.assertIsInstance(list(events.cm.group_iterator()), list)
+        self.assertIsInstance(list(events.cm.group_iterator(3)), list)
+        events.cm.filter_on_group_size(1, 100)
+        events.cm.get_element_membership(1)
+        events.cm.get_all_element_memberships()
+        events.get_events()
+        events.get_event_types()
+        ev = events.get_event("1_1")
+        ev.get_from_event()
+        ev.get_to_event()
+        facets((events.cm), "0_2", "+")
+        event_weights(events.cm, "0_2", "+")
+        evn(events.cm, "0_2", "+")
 
 
 if __name__ == "__main__":

diff --git a/cdlib/test/test_nodeclustering.py b/cdlib/test/test_nodeclustering.py
@@ -94,5 +94,3 @@ def test_comparison(self):
         self.assertIsInstance(coms.geometric_accuracy(coms2).score, float)
         self.assertIsInstance(coms.overlap_quality(coms2).score, float)
         self.assertIsInstance(coms.sample_expected_sim(coms2).score, float)
-
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -94,5 +94,3 @@ def test_comparison(self):
		self.assertIsInstance(coms.geometric_accuracy(coms2).score, float)
		self.assertIsInstance(coms.overlap_quality(coms2).score, float)
		self.assertIsInstance(coms.sample_expected_sim(coms2).score, float)