Skip to content

Commit

Permalink
🎨 documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
GiulioRossetti committed May 19, 2024
1 parent 8035ccd commit d8a9a6c
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 62 deletions.
32 changes: 24 additions & 8 deletions cdlib/lifecycles/algorithms/event_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,30 @@ def event_weights_from_flow(analyzed_flows: dict, direction: str) -> dict:

def _compute_event_scores(analyzed_flow: dict) -> list:
return [
(analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * analyzed_flow["Outflow"],
(1 - analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * analyzed_flow["Outflow"],
(analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * analyzed_flow["Outflow"],
(1 - analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * analyzed_flow["Outflow"],
(analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * (1 - analyzed_flow["Outflow"]),
(1 - analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * (1 - analyzed_flow["Outflow"]),
(analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * (1 - analyzed_flow["Outflow"]),
(1 - analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * (1 - analyzed_flow["Outflow"]),
(analyzed_flow["Unicity"])
* (1 - analyzed_flow["Identity"])
* analyzed_flow["Outflow"],
(1 - analyzed_flow["Unicity"])
* (1 - analyzed_flow["Identity"])
* analyzed_flow["Outflow"],
(analyzed_flow["Unicity"])
* analyzed_flow["Identity"]
* analyzed_flow["Outflow"],
(1 - analyzed_flow["Unicity"])
* analyzed_flow["Identity"]
* analyzed_flow["Outflow"],
(analyzed_flow["Unicity"])
* analyzed_flow["Identity"]
* (1 - analyzed_flow["Outflow"]),
(1 - analyzed_flow["Unicity"])
* analyzed_flow["Identity"]
* (1 - analyzed_flow["Outflow"]),
(analyzed_flow["Unicity"])
* (1 - analyzed_flow["Identity"])
* (1 - analyzed_flow["Outflow"]),
(1 - analyzed_flow["Unicity"])
* (1 - analyzed_flow["Identity"])
* (1 - analyzed_flow["Outflow"]),
]


Expand Down
40 changes: 0 additions & 40 deletions cdlib/lifecycles/algorithms/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,46 +75,6 @@ def _max_second_difference(labels):
return max_val - second_largest


def _berger_parker_index(labels):
"""
Dominance index, the probability of the most frequent attribute value in the set
Args:
labels (_type_): _description_
Returns:
_type_: _description_
"""
n = len(labels)
counter = Counter(labels)
probabilities = [count / n for count in counter.values()]
max_val = np.max(probabilities)
return max_val


def _gini_index(labels):

n = len(labels)
counter = Counter(labels)
probabilities = [count / n for count in counter.values()]

array = np.array(probabilities)
"""Calculate the Gini coefficient of a numpy array."""
# based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
# from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm

array = array.flatten() # all values are treated equally, arrays must be 1d
if np.amin(array) < 0:
array -= np.amin(array) # values cannot be negative
array += 0.0000001 # values cannot be 0
array = np.sort(array) # values must be sorted
index = np.arange(1, array.shape[0] + 1) # index per array element
n = array.shape[0] # number of array elements
return (np.sum((2 * index - n - 1) * array)) / (
n * np.sum(array)
) # Gini coefficient


def facet_unicity(labels: list) -> float:
"""
the unicity facet quantifies the extent to which a target set comes from one (=1) or multiple (->0) flows.
Expand Down
10 changes: 0 additions & 10 deletions cdlib/lifecycles/classes/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,6 @@ def __add_partition(self, partition: list) -> None:
tmp = set()
tmp.add(group)
self.named_sets[name] = tmp

elif self.dtype == dict:
for elem in group:
to_str = json.dumps(elem)
self.named_sets[name].add(to_str)

elif self.dtype == list:
for elem in group:
to_str = str(elem)
self.named_sets[name].add(to_str)
else:
raise NotImplementedError("dtype not supported")

Expand Down
47 changes: 45 additions & 2 deletions cdlib/test/test_events.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import unittest
import cdlib
from cdlib import algorithms
from cdlib import LifeCycle
from cdlib import TemporalClustering
from cdlib.lifecycles.algorithms.event_analysis import (
facets,
event_weights,
event as evn,
)
from plotly import graph_objects as go
import networkx as nx
from networkx.generators.community import LFR_benchmark_graph
Expand Down Expand Up @@ -79,7 +85,9 @@ def test_custom_matching(self):
c = events.analyze_flows("+")
self.assertIsInstance(c, dict)

events.compute_events_with_custom_matching(jaccard, two_sided=False, threshold=0)
events.compute_events_with_custom_matching(
jaccard, two_sided=False, threshold=0
)
c = events.analyze_flows("+")
self.assertIsInstance(c, dict)

Expand Down Expand Up @@ -205,7 +213,7 @@ def random_attributes():
attrs = events.get_attribute("fakeattribute")
self.assertIsInstance(attrs, dict)

events.analyze_flow("1_1", "+", attr="fakeattribute")
events.analyze_flow("1_1", "+", attr="fakeattribute")
self.assertIsInstance(attrs, dict)

ev = events.get_event("1_1")
Expand All @@ -218,8 +226,43 @@ def random_attributes():
a = ev.to_event # to get the to events of the community 1_2
self.assertIsInstance(a, dict)

def test_marginal(self):
tc = TemporalClustering()
for t in range(0, 10):
g = LFR_benchmark_graph(
n=250,
tau1=3,
tau2=1.5,
mu=0.1,
average_degree=5,
min_community=20,
seed=10,
)
coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
tc.add_clustering(coms, t)

events = LifeCycle(tc)
events.compute_events("facets")

# marginal tests (not all methods are tested since they are not of use in cdlib -
# they are invoked for completeness)
self.assertIsInstance(
events.cm.slice(0, 5), cdlib.lifecycles.classes.matching.CommunityMatching
)
self.assertIsInstance(events.cm.universe_set(), set)
self.assertIsInstance(list(events.cm.group_iterator()), list)
self.assertIsInstance(list(events.cm.group_iterator(3)), list)
events.cm.filter_on_group_size(1, 100)
events.cm.get_element_membership(1)
events.cm.get_all_element_memberships()
events.get_events()
events.get_event_types()
ev = events.get_event("1_1")
ev.get_from_event()
ev.get_to_event()
facets((events.cm), "0_2", "+")
event_weights(events.cm, "0_2", "+")
evn(events.cm, "0_2", "+")


if __name__ == "__main__":
Expand Down
2 changes: 0 additions & 2 deletions cdlib/test/test_nodeclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,3 @@ def test_comparison(self):
self.assertIsInstance(coms.geometric_accuracy(coms2).score, float)
self.assertIsInstance(coms.overlap_quality(coms2).score, float)
self.assertIsInstance(coms.sample_expected_sim(coms2).score, float)


0 comments on commit d8a9a6c

Please sign in to comment.