diff --git a/cdlib/__init__.py b/cdlib/__init__.py
index 3533b526..f1ef144b 100644
--- a/cdlib/__init__.py
+++ b/cdlib/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.3.1'
+__version__ = "0.4.0"
from cdlib.classes.node_clustering import NodeClustering
from cdlib.classes.edge_clustering import EdgeClustering
from cdlib.classes.fuzzy_node_clustering import FuzzyNodeClustering
@@ -6,3 +6,4 @@
from cdlib.classes.bipartite_node_clustering import BiNodeClustering
from cdlib.classes.temporal_clustering import TemporalClustering
from cdlib.classes.named_clustering import NamedClustering
+from cdlib.lifecycles import LifeCycle, CommunityEvent
diff --git a/cdlib/algorithms/crisp_partition.py b/cdlib/algorithms/crisp_partition.py
index d87fde73..19314416 100644
--- a/cdlib/algorithms/crisp_partition.py
+++ b/cdlib/algorithms/crisp_partition.py
@@ -517,13 +517,12 @@ def louvain(
========== ======== ========
:param g_original: a networkx/igraph object
- :param partition : NodeClustering object, optional the algorithm will start using this partition of the nodes.
- :param weight: str, optional the key in graph to use as weight. Default to 'weight'
+ :param partition: NodeClustering object, optional the algorithm will start using this partition of the nodes
+ :param weight: str, optional the key in graph to use as weight. Default to "weight"
:param resolution: double, optional Will change the size of the communities, default to 1.
- :param randomize: int, RandomState instance or None, optional (default=None). If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`.
+ :param randomize: int, RandomState instance or None, optional (default=None).
:return: NodeClustering object
-
:Example:
>>> from cdlib import algorithms
@@ -536,6 +535,7 @@ def louvain(
Blondel, Vincent D., et al. `Fast unfolding of communities in large networks. `_ Journal of statistical mechanics: theory and experiment 2008.10 (2008): P10008.
.. note:: Reference implementation: https://github.com/taynaud/python-louvain
+
"""
g = convert_graph_formats(g_original, nx.Graph)
@@ -2689,9 +2689,21 @@ def paris(g_original: object) -> NodeClustering:
.. note:: Reference implementation: https://github.com/tbonald/paris
"""
+
g = convert_graph_formats(g_original, nx.Graph)
- D = paris_alg(g)
- clustering = paris_best_clustering(D)
+
+ dmap = {n: i for i, n in enumerate(g.nodes)}
+ reverse_map = {i: n for n, i in dmap.items()}
+ nx.relabel_nodes(g_original, dmap, False)
+
+ D = paris_alg(g_original)
+ coms = paris_best_clustering(D)
+
+ clustering = []
+
+ for com in coms:
+ com = [reverse_map[c] for c in com]
+ clustering.append(com)
return NodeClustering(
clustering, g_original, "Paris", method_parameters={}, overlap=False
diff --git a/cdlib/algorithms/temporal_partition.py b/cdlib/algorithms/temporal_partition.py
index a724264c..0bdf3ec2 100644
--- a/cdlib/algorithms/temporal_partition.py
+++ b/cdlib/algorithms/temporal_partition.py
@@ -1,5 +1,6 @@
from cdlib import TemporalClustering, NamedClustering
from cdlib.algorithms.internal_dcd.eTILES import eTILES
+import networkx as nx
__all__ = ["tiles"]
@@ -34,7 +35,7 @@ def tiles(dg: object, obs: int = 1) -> TemporalClustering:
:References:
- Rossetti, Giulio; Pappalardo, Luca; Pedreschi, Dino, and Giannotti, Fosca. `Tiles: an online algorithm for community discovery in dynamic social networks.`_ Machine Learning (2016), 106(8), 1213-1241.
+ Rossetti, Giulio; Pappalardo, Luca; Pedreschi, Dino, and Giannotti, Fosca. Tiles: an online algorithm for community discovery in dynamic social networks. Machine Learning (2016), 106(8), 1213-1241.
"""
alg = eTILES(dg=dg, obs=obs)
tc = TemporalClustering()
@@ -57,8 +58,10 @@ def tiles(dg: object, obs: int = 1) -> TemporalClustering:
mtc = alg.get_matches()
tc.add_matching(mtc)
+ ### polytree
+
# cleaning & updating community matching
- dg = tc.lifecycle_polytree(None, False)
+ dg = __lifecycle_polytree(tc)
community_ids = list(dg.nodes())
tids = tc.get_observation_ids()
@@ -77,3 +80,22 @@ def tiles(dg: object, obs: int = 1) -> TemporalClustering:
tc.add_matching(mtc)
return tc
+
+
+def __lifecycle_polytree(tc) -> nx.DiGraph:
+ """
+ Reconstruct the poly-tree representing communities lifecycles using a provided similarity function.
+ """
+
+ lifecycle = tc.matching
+
+ pt = nx.DiGraph()
+ if len(lifecycle[0]) == 3:
+ for u, v, w in lifecycle:
+ pt.add_edge(u, v, weight=w)
+ else:
+ # implicit matching
+ for u, v in lifecycle:
+ pt.add_edge(u, v)
+
+ return pt
diff --git a/cdlib/classes/node_clustering.py b/cdlib/classes/node_clustering.py
index fd454a7d..badf25c7 100644
--- a/cdlib/classes/node_clustering.py
+++ b/cdlib/classes/node_clustering.py
@@ -704,9 +704,9 @@ def normalized_mutual_information(
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.normalized_mutual_information(leiden_communities)
@@ -728,9 +728,9 @@ def overlapping_normalized_mutual_information_LFK(
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.overlapping_normalized_mutual_information_LFK(leiden_communities)
@@ -782,9 +782,9 @@ def omega(self, clustering: Clustering) -> evaluation.MatchingResult:
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.omega(leiden_communities)
@@ -805,9 +805,9 @@ def f1(self, clustering: Clustering) -> evaluation.MatchingResult:
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.f1(leiden_communities)
@@ -828,9 +828,9 @@ def nf1(self, clustering: Clustering) -> evaluation.MatchingResult:
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.nf1(leiden_communities)
@@ -871,9 +871,9 @@ def adjusted_rand_index(self, clustering: Clustering) -> evaluation.MatchingResu
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.adjusted_rand_index(leiden_communities)
@@ -915,9 +915,9 @@ def adjusted_mutual_information(
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.adjusted_mutual_information(leiden_communities)
@@ -942,9 +942,9 @@ def variation_of_information(
:Example:
- >>> from cdlib.algorithms import louvain
+ >>> from cdlib import algorithms
>>> g = nx.karate_club_graph()
- >>> communities = louvain(g)
+ >>> communities = algorithms.louvain(g)
>>> leiden_communities = algorithms.leiden(g)
>>> mod = communities.variation_of_information(leiden_communities)
@@ -954,3 +954,522 @@ def variation_of_information(
1. Meila, M. (2007). **Comparing clusterings - an information based distance.** Journal of Multivariate Analysis, 98, 873-895. doi:10.1016/j.jmva.2006.11.013
"""
return evaluation.variation_of_information(self, clustering)
+
+ def partition_closeness_simple(
+ self, clustering: Clustering
+ ) -> evaluation.MatchingResult:
+ """Community size density closeness.
+ Simple implementation that does not leverage kernel density estimator.
+
+ $$ S_G(A,B) = \\frac{1}{2} \Sum_{i=1}^{r}\Sum_{j=1}^{s} min(\\frac{n^a(x^a_i)}{N^a}, \\frac{n^b_j(x^b_j)}{N^b}) \delta(x_i^a,x_j^b) $$
+
+ where:
+
+ $$ N^a $$ total number of communities in A of any size;
+ $$ x^a $$ ordered list of community sizes for A;
+ $$ n^a $$ multiplicity of community sizes for A.
+
+ (symmetrically for B)
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.partition_closeness_simple(leiden_communities)
+
+ :Reference:
+
+ 1. Dao, Vinh-Loc, Cécile Bothorel, and Philippe Lenca. "Estimating the similarity of community detection methods based on cluster size distribution." International Conference on Complex Networks and their Applications. Springer, Cham, 2018.
+ """
+ return evaluation.partition_closeness_simple(self, clustering)
+
+ def ecs(
+ self,
+ clustering: object,
+ alpha: float = 0.9,
+ r: float = 1.0,
+ r2: float = None,
+ rescale_path_type: str = "max",
+ ppr_implementation: str = "prpack",
+ ) -> evaluation.MatchingResult:
+ """
+ The element-centric clustering similarity.
+
+ :param clustering: NodeClustering object
+ :param alpha: The personalized page-rank return probability as a float in [0,1]. float, default 0.9
+ :param r: The hierarchical scaling parameter for clustering1. float, default 1.0
+ :param r2: The hierarchical scaling parameter for clustering2. float, default None
+ :param rescale_path_type: rescale the hierarchical height by: 'max' the maximum path from the root; 'min' the minimum path form the root; 'linkage' use the linkage distances in the clustering.
+ :param ppr_implementation: Choose an implementation for personalized page-rank calculation: 'prpack' use PPR algorithms in igraph; 'power_iteration': use power_iteration method.
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.ecs(leiden_communities)
+
+ :Reference:
+
+ A.J. Gates, I.B. Wood, W.P. Hetrick, and YY Ahn [2019]. "Element-centric clustering comparison unifies overlaps and hierarchy". Scientific Reports 9, 8574
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.ecs(
+ self,
+ clustering,
+ alpha=alpha,
+ r=r,
+ r2=r2,
+ rescale_path_type=rescale_path_type,
+ ppr_implementation=ppr_implementation,
+ )
+
+ def jaccard_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Jaccard index between two clusterings.
+
+ .. math:: J = \\frac{N11}{(N11+N10+N01)}
+
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.jaccard_index(leiden_communities)
+
+ :Reference:
+
+ Paul Jaccard. The distribution of the flora in the alpine zone. New Phytologist, 11(2):37–50, 1912.
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.jaccard_index(self, clustering)
+
+ def rand_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Rand index between two clusterings.
+
+ .. math:: RI = \\frac{(N11 + N00)}{(N11 + N10 + N01 + N00)}
+
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.rand_index(leiden_communities)
+
+ :Reference:
+
+ William M Rand. Objective Criteria for the Evaluation of Clustering Methods. Journal of the American Statistical Association, 66(336):846, 1971.
+
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.rand_index(self, clustering)
+
+ def fowlkes_mallows_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Fowlkes and Mallows index between two clusterings
+
+ .. math:: FM = \\frac{N11}{ \sqrt{ (N11 + N10) * (N11 + N01) }}
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.fowlkes_mallows_index(leiden_communities)
+
+ :Reference:
+
+ Edward B. Fowlkes and Colin L. Mallows. A method for comparing two hierarchical clusterings. Journal of the American Statistical Association, 78(383):553–569, 1983.
+
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.fowlkes_mallows_index(self, clustering)
+
+ def classification_error(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Jaccard index between two clusterings.
+
+ .. math:: CE = 1 - PI
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.classification_error(leiden_communities)
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.classification_error(self, clustering)
+
+ def czekanowski_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+
+ This function calculates the Czekanowski between two clusterings.
+
+ Also known as:
+ Dice Symmetric index
+ Sorensen index
+
+ .. math:: F = \\frac{2*N11}{(2*N11 + N10 + N01)}
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.czekanowski_index(leiden_communities)
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.czekanowski_index(self, clustering)
+
+ def dice_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Czekanowski between two clusterings.
+
+ Also known as:
+ Czekanowski index
+ Sorensen index
+
+ .. math:: F = \\frac{2*N11}{(2*N11 + N10 + N01)}
+
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.dice_index(leiden_communities)
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+
+ return evaluation.dice_index(self, clustering)
+
+ def sorensen_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Sorensen between two clusterings.
+
+ Also known as:
+ Czekanowski index
+ Dice index
+
+ .. math:: F = \\frac{2*N11}{(2*N11 + N10 + N01)}
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.sorensen_index(leiden_communities)
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+
+ """
+
+ return evaluation.sorensen_index(self, clustering)
+
+ def rogers_tanimoto_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Rogers and Tanimoto index between two clusterings.
+
+ .. math:: RT = \\frac{(N11 + N00)}{(N11 + 2*(N10+N01) + N00)}
+
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.rogers_tanimoto_index(leiden_communities)
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.rogers_tanimoto_index(self, clustering)
+
+ def southwood_index(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Southwood index between two clusterings.
+
+ .. math:: \\frac{N11}{(N10 + N01)}
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.southwood_index(leiden_communities)
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.southwood_index(self, clustering)
+
+ def mi(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Mutual Information (MI) between two clusterings.
+
+ .. math:: MI = (S(c1) + S(c2) - S(c1, c2))
+
+ where S(c1) is the Shannon Entropy of the clustering size distribution, S(c1, c2) is the Shannon Entropy of the join clustering size distribution,
+
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.mi(leiden_communities)
+
+ :Reference:
+
+ Leon Danon, Albert D ıaz-Guilera, Jordi Duch, and Alex Arenas. Comparing community structure identification. Journal of Statistical Mechanics: Theory and Experiment, 2005(09):P09008–P09008, September 2005.
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.mi(self, clustering)
+
+ def rmi(
+ self,
+ clustering: object,
+ norm_type: str = "none",
+ logbase: int = 2,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the Reduced Mutual Information (RMI) between two clusterings.
+
+ .. math:: RMI = MI(c1, c2) - \\log \\frac{Omega(a, b)}{n}
+
+ where MI(c1, c2) is mutual information of the clusterings c1 and c2, and Omega(a, b) is the number of contingency tables with row and column sums equal to a and b.
+
+ :param clustering: NodeClustering object
+ :param norm_type: The normalization types are: 'none' returns the RMI without a normalization; 'normalized' returns the RMI with upper bound equals to 1.
+ :param logbase: int, default 2
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.rmi(leiden_communities)
+
+ :Reference:
+
+ M. E. J. Newman, George T. Cantwell, and Jean-Gabriel Young. Improved mutual information measure for classification and community detection. arXiv:1907.12581, 2019.
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.rmi(self, clustering, norm_type=norm_type, logbase=logbase)
+
+ def geometric_accuracy(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the geometric accuracy between two (overlapping) clusterings.
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.geometric_accuracy(leiden_communities)
+
+ :Reference:
+
+ Tamás Nepusz, Haiyuan Yu, and Alberto Paccanaro. Detecting overlapping protein complexes in protein-protein interaction networks. Nature Methods, 9(5):471–472, 2012.
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.geometric_accuracy(self, clustering)
+
+ def overlap_quality(
+ self,
+ clustering: object,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the overlap quality between two (overlapping) clusterings.
+
+ :param clustering: NodeClustering object
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.overlap_quality(leiden_communities)
+
+ :Reference:
+
+ Yong-Yeol Ahn, James P Bagrow, and Sune Lehmann. Link communities reveal multiscale complexity in networks. Nature, 466(7307):761–764, June 2010.
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.overlap_quality(self, clustering)
+
+ def sample_expected_sim(
+ self,
+ clustering: object,
+ measure: str = "jaccard_index",
+ random_model: str = "perm",
+ n_samples: int = 1,
+ keep_samples: bool = False,
+ ) -> evaluation.MatchingResult:
+ """
+ This function calculates the expected Similarity for all pair-wise comparisons between Clusterings drawn from one of six random models.
+
+ .. note:: Clustering 2 is considered the gold-standard clustering for one-sided expectations
+
+
+ :param clustering: NodeClustering object
+ :param measure: The similarity measure to evaluate. Must be one of [ecs, jaccard_index, rand_index, fowlkes_mallows_index, classification_error, czekanowski_index, dice_index, sorensen_index, rogers_tanimoto_index, southwood_index, mi, rmi, vi, geometric_accuracy, overlap_quality, sample_expected_sim]
+ :param random_model: The random model to use:
+
+ 'all' : uniform distribution over the set of all clusterings of
+ n_elements
+
+ 'all1' : one-sided selection from the uniform distribution over the set
+ of all clusterings of n_elements
+
+ 'num' : uniform distribution over the set of all clusterings of
+ n_elements in n_clusters
+
+ 'num1' : one-sided selection from the uniform distribution over the set
+ of all clusterings of n_elements in n_clusters
+
+ 'perm' : the permutation model for a fixed cluster size sequence
+
+ 'perm1' : one-sided selection from the permutation model for a fixed
+ cluster size sequence, same as 'perm'
+
+ :param n_samples: The number of random Clusterings sampled to determine the expected similarity.
+ :param keep_samples: If True, returns the Similarity samples themselves, otherwise return their mean.
+ :return: MatchingResult object
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> import networkx as nx
+ >>> g = nx.karate_club_graph()
+ >>> louvain_communities = algorithms.louvain(g)
+ >>> leiden_communities = algorithms.leiden(g)
+ >>> louvain_communities.sample_expected_sim(leiden_communities)
+
+ .. note:: The function requires the clusim library to be installed. You can install it via pip: pip install clusim
+ """
+ return evaluation.sample_expected_sim(
+ self,
+ clustering,
+ measure=measure,
+ random_model=random_model,
+ n_samples=n_samples,
+ keep_samples=keep_samples,
+ )
diff --git a/cdlib/classes/temporal_clustering.py b/cdlib/classes/temporal_clustering.py
index 61b2c7b7..09238f68 100644
--- a/cdlib/classes/temporal_clustering.py
+++ b/cdlib/classes/temporal_clustering.py
@@ -150,107 +150,3 @@ def get_explicit_community_match(self) -> list:
cid is the position of the community within the Clustering object.
"""
return self.matching
-
- def community_matching(
- self, method: Callable[[set, set], float], two_sided: bool = False
- ) -> list:
- """
- Reconstruct community matches across adjacent observations using a provided similarity function.
-
- :param method: a set similarity function with co-domain in [0,1] (e.g., Jaccard)
- :param two_sided: boolean.
- Whether the match has to be applied only from the past to the future (False, default)
- or even from the future to the past (True)
- :return: a list of tuples [(Ti_Ca, Tj_Cb, score), ... ].
- Community names are assigned following the pattern {tid}_{cid}, where tid is the time of observation and
- cid is the position of the community within the Clustering object.
- """
-
- if self.matching is not None:
- return self.matching
-
- lifecycle = []
-
- for i in range(self.current_observation - 1):
- c_i = self.clusterings[i]
- c_j = self.clusterings[i + 1]
- for name_i, com_i in c_i.named_communities.items():
-
- # name_i = f"{self.obs_to_time[i]}_{cid_i}"
- best_match = []
- best_score = 0
-
- for name_j, com_j in c_j.named_communities.items():
- # name_j = f"{self.obs_to_time[i+1]}_{cid_j}"
-
- match = method(com_i, com_j)
- if match > best_score:
- best_match = [name_j]
- best_score = match
- elif match == best_score:
- best_match.append(name_j)
-
- for j in best_match:
- lifecycle.append((name_i, j, best_score))
-
- if two_sided:
-
- for i in range(self.current_observation - 1, 0, -1):
- c_i = self.clusterings[i]
- c_j = self.clusterings[i - 1]
-
- for name_i, com_i in c_i.named_communities.items():
- # name_i = f"{self.obs_to_time[i]}_{cid_i}"
- best_match = []
- best_score = 0
-
- for name_j, com_j in c_j.named_communities.items():
- # name_j = f"{self.obs_to_time[i-1]}_{cid_j}"
-
- match = method(com_i, com_j)
- if match > best_score:
- best_match = [name_j]
- best_score = match
- elif match == best_score:
- best_match.append(name_j)
-
- for j in best_match:
- lifecycle.append((j, name_i, best_score))
-
- self.matched = lifecycle
-
- return lifecycle
-
- def lifecycle_polytree(
- self, method: Callable[[set, set], float] = None, two_sided: bool = False
- ) -> nx.DiGraph:
- """
- Reconstruct the poly-tree representing communities lifecycles using a provided similarity function.
-
- :param method: a set similarity function with co-domain in [0,1] (e.g., Jaccard)
- :param two_sided: boolean.
- Whether the match has to be applied only from the past to the future (False, default)
- or even from the future to the past (True)
- :return: a networkx DiGraph object.
- Nodes represent communities, their ids are assigned following the pattern {tid}_{cid},
- where tid is the time of observation and
- cid is the position of the community within the Clustering object.
- """
-
- if self.matching is not None:
- lifecycle = self.matching
- else:
- if method is None:
- raise ValueError("method parameter not specified")
- lifecycle = self.community_matching(method, two_sided)
-
- pt = nx.DiGraph()
- if len(lifecycle[0]) == 3:
- for u, v, w in lifecycle:
- pt.add_edge(u, v, weight=w)
- else:
- # implicit matching
- for u, v in lifecycle:
- pt.add_edge(u, v)
-
- return pt
diff --git a/cdlib/lifecycles/__init__.py b/cdlib/lifecycles/__init__.py
new file mode 100644
index 00000000..9f8aea09
--- /dev/null
+++ b/cdlib/lifecycles/__init__.py
@@ -0,0 +1,2 @@
+from .classes import LifeCycle, CommunityEvent
+from .algorithms import *
diff --git a/cdlib/lifecycles/algorithms/__init__.py b/cdlib/lifecycles/algorithms/__init__.py
new file mode 100644
index 00000000..d9d5616a
--- /dev/null
+++ b/cdlib/lifecycles/algorithms/__init__.py
@@ -0,0 +1,4 @@
+from .event_analysis import *
+from .classic_match import *
+from .measures import *
+from .null_model import *
diff --git a/cdlib/lifecycles/algorithms/classic_match.py b/cdlib/lifecycles/algorithms/classic_match.py
new file mode 100644
index 00000000..d61f8796
--- /dev/null
+++ b/cdlib/lifecycles/algorithms/classic_match.py
@@ -0,0 +1,271 @@
+from itertools import combinations
+
+__all__ = ["events_asur", "event_graph_greene"]
+
+
+def _asur_merge_score(t: set, R: list) -> float:
+ """
+ Compute the asur Merge score.
+
+ defined as the ratio of the intersection of the target set and the union of the reference sets
+ over the size of the largest set (either the target or the union of the reference sets)
+
+ :param t: target set
+ :param R: list of reference sets
+ :return: Merge score
+ """
+ union_reference = set.union(*R)
+ nodes = union_reference.intersection(t)
+ res = len(nodes) / len(max([union_reference, t], key=len))
+
+ return res
+
+
+def _greene_merge_score(t: set, R: set) -> float:
+ """
+ Compute the greene Merge score.
+ based on the jaccard index
+
+ :param t: target set
+ :param R: reference set
+ :return: Merge score
+ """
+
+ return len(t.intersection(R)) / len(t.union(R))
+
+
+def _find_asur_merge_events(lc: object, th: float) -> tuple:
+ """
+ Find Merge events in a lifecycle according to Asur et al.
+
+ :param lc: the lifecycle object
+ :param th: cluster integrity threshold
+ :return: dictionary of Merge events
+ """
+ events = []
+ flows = []
+ for t in lc.temporal_ids()[1:]: # start from the second time step
+ for set_name in lc.get_partition_at(t):
+ target = lc.get_group(set_name)
+ flow = lc.group_flow(set_name, "-")
+ r_names = list(flow.keys()) # names of the reference sets
+ # compute for all pair of reference sets (combinations)
+ for r1, r2 in combinations(r_names, 2):
+ merge_score = _asur_merge_score(
+ target,
+ [lc.get_group(r1), lc.get_group(r2)],
+ )
+
+ if merge_score > th:
+ events.append(
+ {
+ "src": set_name,
+ "type": "Merge",
+ "score": merge_score,
+ "ref_sets": [r1, r2], # names of the reference sets
+ }
+ )
+
+ flows.append(
+ {
+ "src": set_name,
+ "type": "Merge",
+ "target": r1,
+ "flow": lc.get_group(r1).intersection(
+ lc.get_group(set_name)
+ ),
+ }
+ )
+ flows.append(
+ {
+ "src": set_name,
+ "type": "Merge",
+ "target": r2,
+ "flow": lc.get_group(r2).intersection(
+ lc.get_group(set_name)
+ ),
+ }
+ )
+
+ return events, flows
+
+
+def _find_asur_split_events(lc: object, th: float) -> tuple:
+ """
+ Find Merge events in a lifecycle according to Asur et al.
+
+ :param lc: the lifecycle object
+ :param th: cluster integrity threshold
+ :return: dictionary of Merge events
+ """
+ events, flows = [], []
+ for t in lc.temporal_ids()[0:]: # start from the second time step
+ for set_name in lc.get_partition_at(t):
+ target = lc.get_group(set_name)
+ flow = lc.group_flow(set_name, "+")
+ r_names = list(flow.keys()) # names of the reference sets
+ # compute for all pair of reference sets (combinations)
+ for r1, r2 in combinations(r_names, 2):
+ merge_score = _asur_merge_score(
+ target, [lc.get_group(r1), lc.get_group(r2)]
+ )
+
+ if merge_score > th:
+ events.append(
+ {
+ "src": set_name,
+ "type": "Split",
+ "score": merge_score,
+ "ref_sets": [r1, r2], # names of the reference sets
+ }
+ )
+
+ flows.append(
+ {
+ "src": set_name,
+ "type": "Merge",
+ "target": r1,
+ "flow": lc.get_group(r1).intersection(
+ lc.get_group(set_name)
+ ),
+ }
+ )
+ flows.append(
+ {
+ "src": set_name,
+ "type": "Merge",
+ "target": r2,
+ "flow": lc.get_group(r2).intersection(
+ lc.get_group(set_name)
+ ),
+ }
+ )
+
+ return events, flows
+
+
+def _find_asur_birth_events(lc: object) -> list:
+ """
+ Find continue events in a lifecycle according to Asur et al.
+
+ :param lc: the lifecycle object
+ :return: dictionary of continue events
+ """
+ events = []
+ for t in lc.temporal_ids()[1:]: # start from the second time step
+ for set_name in lc.get_partition_at(t):
+ flow = lc.group_flow(set_name, "-")
+ r_names = list(flow.keys()) # names of the reference sets
+ if len(r_names) == 0:
+ events.append({"src": set_name, "type": "Birth"})
+ return events
+
+
+def _find_asur_death_events(lc: object) -> list:
+ """
+ Find continue events in a lifecycle according to Asur et al.
+
+ :param lc: the lifecycle object
+ :return: dictionary of continue events
+ """
+ events = []
+ for t in lc.temporal_ids()[0:-1]: # start from the second time step
+ for set_name in lc.get_partition_at(t):
+ flow = lc.group_flow(set_name, "+")
+ r_names = list(flow.keys()) # names of the reference sets
+ if len(r_names) == 0:
+ events.append({"src": set_name, "type": "Death"})
+ return events
+
+
+def _find_asur_continue_events(lc: object) -> list:
+ """
+ Find continue events in a lifecycle according to Asur et al.
+
+ :param lc: the lifecycle object
+ :return: dictionary of continue events
+ """
+ events = []
+ for t in lc.temporal_ids()[0:-1]: # start from the second time step
+ for set_name in lc.get_partition_at(t):
+ flow = lc.group_flow(set_name, "+")
+
+ r_names = list(flow.keys()) # names of the reference sets
+ for name in r_names:
+ if lc.get_group(name) == lc.get_group(set_name):
+ events.append(
+ {
+ "src": set_name,
+ "type": "Continuation",
+ "ref_set": name,
+ }
+ )
+ continue
+ return events
+
+
+def events_asur(lc: object, th: float = 0.5) -> tuple:
+ """
+ Compute the events in a lifecycle according to Asur et al.
+ Return a dictionary of events of the form {event_type: [event1, event2, ...]}
+
+ :param lc: the lifecycle object
+ :param th: threshold for Merge and Split scores. Defaults to 0.5.
+ :return: dictionary of events
+
+ :Reference:
+ Asur, S., Parthasarathy, S., Ucar, D.:
+ An event-based framework for characterizing the evolutionary behavior of interaction graphs.
+ ACM Transactions on Knowledge Discovery from Data (TKDD) 3(4), 1–36 (2009)
+ """
+ merge_evts, merge_flows = _find_asur_merge_events(lc, th)
+ split_evts, split_flows = _find_asur_split_events(lc, th)
+
+ events = {
+ "Merge": merge_evts,
+ "Split": split_evts,
+ "Birth": _find_asur_birth_events(lc),
+ "Death": _find_asur_death_events(lc),
+ "Continuation": _find_asur_continue_events(lc),
+ }
+
+ flows = {
+ "Merge": merge_flows,
+ "Split": split_flows,
+ }
+
+ return events, flows
+
+
+def event_graph_greene(lc: object, th: float = 0.1) -> tuple:
+ """
+ Compute the event graph in a lifecycle according to Greene et al.
+ Return a list of match between groups, i.e., edges of the event graph.
+
+ :param lc: the lifecycle object
+ :param th: threshold for the Jaccard index. Defaults to 0.1 according to best results in the original paper.
+ :return: list of match between groups
+
+ :Reference:
+ Greene, D., Doyle, D., Cunningham, P.: Tracking the evolution of communities in dynamic social networks.
+ In: Proceedings of the 2010 International Conference on Advances in Social Networks Analysis and Mining
+ (ASONAM 2010), pp. 176–183. IEEE (2010)
+
+ """
+ events = []
+ flows = []
+ for t in lc.temporal_ids()[0:-1]:
+ for set_name in lc.get_partition_at(t):
+ target = lc.get_group(set_name)
+ flow = lc.group_flow(set_name, "+")
+ r_names = list(flow.keys()) # names of the reference sets
+ # compute for all pair of reference sets (combinations)
+ for r in r_names:
+ merge_score = _greene_merge_score(target, lc.get_group(r))
+ if merge_score > th:
+ events.append({"src": set_name, "type": "Merge", "ref_set": r})
+ flows.append(
+ {"src": set_name, "type": "Merge", "target": r, "flow": flow[r]}
+ )
+
+ return {"Merge": events}, {"Merge": flows}
diff --git a/cdlib/lifecycles/algorithms/event_analysis.py b/cdlib/lifecycles/algorithms/event_analysis.py
new file mode 100644
index 00000000..5f865039
--- /dev/null
+++ b/cdlib/lifecycles/algorithms/event_analysis.py
@@ -0,0 +1,225 @@
+from cdlib.lifecycles.classes.matching import CommunityMatching
+from cdlib.lifecycles.algorithms.measures import *
+from cdlib.lifecycles.utils import *
+
+__all__ = [
+ "analyze_all_flows",
+ "analyze_flow",
+ "events_all",
+ "facets",
+ "event_weights",
+ "event",
+ "event_weights_from_flow",
+]
+
+
+def _analyze_one_struct(target, reference) -> dict:
+ # nb reference sets here are already filtered by minimum branch size
+
+ ids_for_entropy = []
+ # els_in_branches = set()
+ for i, r in enumerate(reference):
+ branch = target.intersection(r)
+ ids_for_entropy.extend([str(i)] * len(branch))
+ # els_in_branches.update(branch)
+ # newels_ids = [str(j+len(reference)) for j in range(len(target.difference(els_in_branches)))]
+ # ids_for_entropy.extend(newels_ids)
+
+ return {
+ "Unicity": facet_unicity(ids_for_entropy),
+ "Identity": facet_identity(target, reference),
+ "Outflow": facet_outflow(target, reference),
+ "size": len(target),
+ }
+
+
+def _analyze_one_attr(target, reference, attr) -> dict:
+ mca, pur = purity(target)
+ try:
+ ent = _normalized_shannon_entropy(target, base=2)
+ except ZeroDivisionError:
+ ent = 0
+
+ return {
+ f"{attr}_H": ent,
+ f"{attr}_H_change": facet_metadata(target, reference, base=2),
+ f"{attr}_purity": pur,
+ f"{attr}_mca": mca,
+ }
+
+
+def event_weights_from_flow(analyzed_flows: dict, direction: str) -> dict:
+ """
+ Compute the event weights of the analyzed flows.
+
+ :param analyzed_flows: the result of the analysis of a flow
+ :param direction: the temporal direction in which the flow was analyzed
+ :return: a dictionary containing the event weights
+ """
+ if direction not in ["+", "-"]:
+ raise ValueError(f"direction must be either '+' or '-'")
+ res = {}
+ names = backward_event_names() if direction == "-" else forward_event_names()
+ for id_, analyzed_flow in analyzed_flows.items():
+ scores = _compute_event_scores(analyzed_flow)
+ res[id_] = dict(zip(names, scores))
+
+ return res
+
+
+def _compute_event_scores(analyzed_flow: dict) -> list:
+ return [
+ (analyzed_flow["Unicity"])
+ * (1 - analyzed_flow["Identity"])
+ * analyzed_flow["Outflow"],
+ (1 - analyzed_flow["Unicity"])
+ * (1 - analyzed_flow["Identity"])
+ * analyzed_flow["Outflow"],
+ (analyzed_flow["Unicity"])
+ * analyzed_flow["Identity"]
+ * analyzed_flow["Outflow"],
+ (1 - analyzed_flow["Unicity"])
+ * analyzed_flow["Identity"]
+ * analyzed_flow["Outflow"],
+ (analyzed_flow["Unicity"])
+ * analyzed_flow["Identity"]
+ * (1 - analyzed_flow["Outflow"]),
+ (1 - analyzed_flow["Unicity"])
+ * analyzed_flow["Identity"]
+ * (1 - analyzed_flow["Outflow"]),
+ (analyzed_flow["Unicity"])
+ * (1 - analyzed_flow["Identity"])
+ * (1 - analyzed_flow["Outflow"]),
+ (1 - analyzed_flow["Unicity"])
+ * (1 - analyzed_flow["Identity"])
+ * (1 - analyzed_flow["Outflow"]),
+ ]
+
+
+def events_all(lc: CommunityMatching, direction=None) -> dict:
+ """
+ Compute all events for a lifecycle object.
+
+ :param lc: a LifeCycle object
+ :param direction: the temporal direction in which the events are to be computed
+
+ :return: a dictionary containing the events
+
+ """
+ if direction is None:
+ direction = ["+", "-"]
+ res = {}
+ for d in direction:
+ analyzed_flows = analyze_all_flows(lc, d)
+ res[d] = event_weights_from_flow(analyzed_flows, d)
+ return res
+
+
+def analyze_all_flows(
+ lc: CommunityMatching, direction: str, min_branch_size: int = 1, attr=None
+) -> dict:
+ """
+ Analyze the flow of all sets in a LifeCycle object w.r.t. a given temporal direction.
+ See analyze_flow for more details
+ :param lc: a LifeCycle object
+ :param direction: the temporal direction in which the sets are to be analyzed
+ :param min_branch_size: the minimum number of elements that a branch must contain to be considered
+ :param attr: the name or list of names of the attribute(s) to analyze. If None, no attribute is analyzed
+ :return:
+ """
+ last_id = lc.temporal_ids()[-1] if direction == "+" else lc.temporal_ids()[0]
+ return {
+ name: analyze_flow(
+ lc, name, direction, min_branch_size=min_branch_size, attr=attr
+ )
+ for name in lc.named_sets
+ if not name.split("_")[0] == str(last_id)
+ }
+
+
+def analyze_flow(
+ lc: CommunityMatching,
+ target: str,
+ direction: str,
+ min_branch_size=1,
+ attr: str = None,
+) -> dict:
+ """
+ Analyze the flow of a set with respect to a given temporal direction.
+ Specifically, compute the entropy of the flow, the contribution factor, the difference factor and the set size.
+ If one of more attributes are specified via the attr parameter, also compute the entropy of the attribute values,
+ the entropy change, the purity and the most common attribute value.
+ In case min_branch_size is specified, all branches of the flow that include less than min_branch_size elements are
+ discarded.
+ :param lc: a LifeCycle object
+ :param target: the name of the set to analyze
+ :param direction: the temporal direction in which the set is to be analyzed
+ :param min_branch_size: the minimum number of elements that a branch must contain to be considered
+ :param attr: the name or list of names of the attribute(s) to analyze. If None, no attribute is analyzed
+ :return: a dictionary containing the analysis results
+ """
+
+ flow = lc.group_flow(target, direction=direction, min_branch_size=min_branch_size)
+
+ reference_sets = [lc.get_group(name) for name in flow]
+ analysis = _analyze_one_struct(lc.get_group(target), reference_sets)
+
+ if attr is not None:
+ attrs_to_analyze = [attr] if isinstance(attr, str) else attr
+ for a in attrs_to_analyze:
+ target_attrs = get_group_attribute_values(lc, target, a)
+ reference_attrs = [get_group_attribute_values(lc, name, a) for name in flow]
+ analysis.update(_analyze_one_attr(target_attrs, reference_attrs, a))
+ return analysis
+
+
+def facets(lc: CommunityMatching, target: str, direction: str) -> dict:
+ """
+ Compute the unicity, identity, and outflow facets of a target set in a lifecycle object.
+ Also compute the size of the target set.
+
+ :param lc: a LifeCycle object
+ :param target: the name of the target set
+ :param direction: the temporal direction in which the flow is to be analyzed
+ :return: a dictionary containing the facets
+ """
+ flow = lc.group_flow(target, direction=direction)
+
+ reference_sets = [lc.get_group(name) for name in flow]
+ facets_ = _analyze_one_struct(lc.get_group(target), reference_sets)
+ return facets_
+
+
+def event_weights(lc: CommunityMatching, target: str, direction: str) -> dict:
+ """
+ Compute the event weights of a target set in a lifecycle object.
+
+ :param lc: a LifeCycle object
+ :param target: the name of the target set
+ :param direction: the temporal direction in which the flow is to be analyzed
+ :return: a dictionary containing the event weights
+ """
+ names = backward_event_names() if direction == "-" else forward_event_names()
+ fscores = facets(lc, target, direction)
+ res = _compute_event_scores(fscores)
+ return dict(zip(names, res))
+
+
+def event(lc, target, direction=None):
+ """
+ Compute the event type and typicality of a target set in a lifecycle.
+
+ :param lc: lifecycle object
+ :param target: name of the target set
+ :param direction: temporal direction in which the flow is to be analyzed
+ :return: a dictionary containing the event type and scores
+ """
+ if direction is None:
+ direction = ["+", "-"]
+ back = {}
+ forward = {}
+ if "-" in direction:
+ back = event_typicality(event_weights(lc, target, "-"))
+ if "+" in direction:
+ forward = event_typicality(event_weights(lc, target, "+"))
+ return {"+": forward, "-": back}
diff --git a/cdlib/lifecycles/algorithms/measures.py b/cdlib/lifecycles/algorithms/measures.py
new file mode 100644
index 00000000..e2aec15a
--- /dev/null
+++ b/cdlib/lifecycles/algorithms/measures.py
@@ -0,0 +1,214 @@
+from collections import Counter
+from math import log, e
+from typing import Union, Tuple
+
+import numpy as np
+import cdlib.lifecycles.algorithms.event_analysis as ea
+
+__all__ = [
+ "_normalized_shannon_entropy",
+ "facet_unicity",
+ "facet_identity",
+ "facet_outflow",
+ "facet_metadata",
+ "purity",
+ "event_typicality",
+ "stability",
+]
+
+
+def _entropy(labels: list, base=2) -> float:
+ """
+ computes the Shannon entropy of a list of labels
+
+ :param labels: the list of labels
+ :param base: the base of the logarithm
+ :return: the set entropy
+ """
+ n = len(labels)
+ counter = Counter(labels)
+ probabilities = [count / n for count in counter.values()]
+
+ return -sum(p * log(p, base) for p in probabilities)
+
+
+def _normalized_shannon_entropy(labels, base=2):
+ """
+ the normalized Shannon entropy is the Shannon entropy divided by the maximum possible entropy
+ (logb(n) where n is the number of labels)
+
+ :param labels: the list of labels
+ :param base: the base of the logarithm
+ :return: the normalized Shannon entropy
+ """
+
+ # Example of problem: 40,40,1 compared with 40,40
+
+ base = e if base is None else base
+
+ ent = _entropy(labels, base)
+ max_ent = log(len(list(set(labels))), base)
+ # print(ent, max_ent, labels)
+
+ normalized_entropy = ent / max_ent
+ return normalized_entropy
+
+
+def _max_second_difference(labels):
+ """
+ Function computing the difference between the most frequent attribute value and the
+ second most frequent attribute value
+
+ Args:
+ labels (_type_): the list of labels
+
+ Returns:
+ _type_: _description_
+ """
+ if len(set(labels)) < 2:
+ return 1
+ n = len(labels)
+ counter = Counter(labels)
+ probabilities = [count / n for count in counter.values()]
+ max_val = max(probabilities)
+ second_largest = sorted(probabilities)[-2]
+ return max_val - second_largest
+
+
+def facet_unicity(labels: list) -> float:
+ """
+ the unicity facet quantifies the extent to which a target set comes from one (=1) or multiple (->0) flows.
+ It is computed as the difference between the largest and the second largest group size
+ If the target set is composed of a single group, the unicity facet is 1
+
+ :param labels: the list of group labels
+ :return: the unicity facet
+ """
+
+ if len(set(labels)) < 2:
+ return 1
+ else:
+ # return gini_index(labels)
+ # return normalized_shannon_entropy(labels)
+ # return berger_parker_index(labels)
+ return _max_second_difference(labels)
+
+
+def facet_identity(target: set, reference: list) -> float:
+ """
+ the identity facet quantifies how much the identity of the target set is shared with the reference groups.
+
+
+ :param target: the target set
+ :param reference: the reference sets
+ :return: the contribution factor
+ """
+ w = 0
+ persistent = 0
+ for r in reference:
+ flow = r.intersection(target)
+ w += len(flow) * len(flow) / len(r)
+ # print(len(flow),len(r),len(target),w)
+ persistent += len(flow)
+ # denominator=len(target)
+ if persistent == 0:
+ return 0.0
+ denominator = persistent
+ w = w / denominator
+ return w
+
+
+def facet_outflow(target: set, reference: list) -> float:
+ """
+ the outflow facet is the ratio of the number of elements
+ in the target set that are not in any of the reference sets
+
+ :param target: the target set
+ :param reference: the reference sets
+ :return: the difference factor
+ """
+ try:
+ return len(target.difference(set.union(*reference))) / len(target)
+ except TypeError: # if reference is empty
+ return 1.0
+
+
+def facet_metadata(
+ target_labels: list, reference_labels: list, base: int = None
+) -> Union[float, None]:
+ """
+ compute the change in attribute entropy between a target set and a reference set
+
+ :param target_labels: the labels of the target set
+ :param reference_labels: the labels of the reference sets (a list of lists)
+ :param base: the base of the logarithm
+ :return: the change in attribute entropy
+ """
+ base = e if base is None else base
+ try:
+ target_entropy = _normalized_shannon_entropy(target_labels, base)
+ except ZeroDivisionError:
+ target_entropy = 0
+
+ reference_entropy = 0
+ if len(reference_labels) > 0:
+ for labels in reference_labels:
+ try:
+ reference_entropy += _normalized_shannon_entropy(labels, base)
+ except ZeroDivisionError:
+ continue
+
+ reference_entropy /= len(reference_labels)
+ else:
+ return None
+ return target_entropy - reference_entropy
+
+
+def stability(lc: object, direction: str) -> float:
+ """
+ compute the temporal partition stability.
+ The stability is the average of the continue events scores.
+
+ :param lc: the lifecycle object
+ :param direction: the temporal direction
+ :return: the stability score
+
+ """
+ events = ea.events_all(lc)
+
+ res = 0
+ if len(events[direction]) == 0:
+ return 0
+ for group, event in events[direction].items():
+ res += event["Continue"]
+ return res / len(events[direction])
+
+
+def purity(labels: list) -> Tuple[str, float]:
+ """
+ compute the purity of a set of labels. Purity is defined as the relative frequency
+ of the most frequent attribute value
+
+ :param labels: the list of labels
+ :return: a tuple of the most frequent attribute value and its frequency
+ """
+ most_common_attribute, freq = Counter(labels).most_common(1)[0]
+ return most_common_attribute, freq / len(labels)
+
+
+def event_typicality(event_scores: dict) -> Tuple[str, float]:
+ """
+ compute the event's name and its typicality score.
+ The typicality score is the highest score among all events scores.
+
+ :param event_scores: a dictionary keyed by event name and valued by the event score
+ :return: a tuple of the event name and its typicality score
+
+ """
+ highest_score = 0
+ event = ""
+ for ev, score in event_scores.items():
+ if score > highest_score:
+ highest_score = score
+ event = ev
+ return event, highest_score
diff --git a/cdlib/lifecycles/algorithms/null_model.py b/cdlib/lifecycles/algorithms/null_model.py
new file mode 100644
index 00000000..6975048e
--- /dev/null
+++ b/cdlib/lifecycles/algorithms/null_model.py
@@ -0,0 +1,169 @@
+import random
+from collections import Counter, defaultdict
+from statistics import mean, stdev
+
+import scipy.stats as stats
+
+__all__ = ["flow_null", "all_flows_null"]
+
+
+def _generate_random_branch(reference, size):
+ """
+ Generate a random branch of a given size by sampling elements from the reference partition.
+ """
+ elems = list()
+ for subset in reference:
+ elems.extend(subset)
+ return random.sample(elems, size)
+
+
+def _null_model(branch, reference, iterations):
+ """
+ Generate a null model for a given branch by generating num_permutations random branches of the same size and
+ computing the mean and standard deviation of the frequency of each element in the reference partition.
+ """
+ null_branch = defaultdict(list)
+ for _ in range(iterations):
+ random_branch = _generate_random_branch(reference, len(branch))
+ count = Counter(random_branch)
+ for name, frequency in count.items():
+ null_branch[name].append(frequency)
+
+ avg_null_branch = defaultdict(dict)
+ for name, frequencies in null_branch.items():
+ if len(frequencies) == 1:
+ avg_null_branch[name]["mean"] = frequencies[0]
+ avg_null_branch[name]["std"] = 0
+ else:
+ avg_null_branch[name]["mean"] = mean(frequencies)
+ avg_null_branch[name]["std"] = stdev(frequencies)
+
+ return dict(avg_null_branch)
+
+
+def _p_value(size, null_model):
+ """
+ Compute the p-value of a branch given a null model via z-score
+ """
+
+ z = (size - null_model["mean"]) / (
+ null_model["std"] + 1e-6
+ ) # 1e-6 to avoid division by zero
+ p = stats.norm.sf(abs(z))
+
+ return p
+
+
+def flow_null(
+ lc: object,
+ target: str,
+ direction: str,
+ min_branch_size: int = 1,
+ iterations: int = 1000,
+) -> dict:
+ """
+ Compare the flow with a null model. Each branch of each flow is compared with a null branch of the same size.
+ The null model is generated by randomly sampling elements from the reference partition *iterations* times.
+ The mean and standard deviation of the null model are used to compute a z-score
+ for each branch, which is then used to compute a p-value.
+
+ :param lc: a CommunityMatching object
+ :param target: target set identifier
+ :param direction: temporal direction
+ :param min_branch_size: minimum size of a branch to be considered
+ :param iterations: number of random draws to be used to generate the null model
+ :return: a dictionary keyed by set identifier and valued by mean, std, and p-value
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from cdlib.lifecycles.algorithms import flow_null
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> validated = flow_null(events, "0_2", "+")
+
+ """
+
+ flow = lc.group_flow(target, direction, min_branch_size)
+ tid = int(target.split("_")[0])
+ if direction == "+":
+ tid += 1
+ elif direction == "-":
+ tid -= 1
+ else:
+ raise ValueError(f"Invalid direction: {direction}")
+ # convert to list of ids lists
+ reference = [[id_] * len(lc.get_group(id_)) for id_ in lc.get_partition_at(tid)]
+
+ validated = dict()
+ for name, subset in flow.items():
+ null_model = _null_model(subset, reference, iterations)[name]
+ # mull mean, null std, p-value
+ validated[name] = {
+ "mean": null_model["mean"],
+ "std": null_model["std"],
+ "p-value": _p_value(len(subset), null_model),
+ }
+ return validated
+
+
+def all_flows_null(
+ lc: object,
+ direction: str,
+ min_branch_size=1,
+ iterations=1000,
+):
+ """
+ Compare all flows with null models. See validate_flow for details.
+
+ :param lc: a CommunityOMatching object
+ :param direction: temporal direction
+ :param min_branch_size: minimum size of a branch to be considered
+ :param iterations: number of random draws to be used to generate the null model
+ :return: a dictionary keyed by set identifier and valued by mean, std, and p-value
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from cdlib.lifecycles.algorithms import all_flows_null
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> validated = all_flows_null(events, "+")
+
+ """
+ validated = dict()
+ for target, flow in lc.all_flows(direction, min_branch_size).items():
+ validated[target] = flow_null(
+ lc, target, direction, min_branch_size, iterations
+ )
+ return validated
diff --git a/cdlib/lifecycles/classes/__init__.py b/cdlib/lifecycles/classes/__init__.py
new file mode 100644
index 00000000..dc7347ea
--- /dev/null
+++ b/cdlib/lifecycles/classes/__init__.py
@@ -0,0 +1,2 @@
+from .event import *
+from .matching import *
diff --git a/cdlib/lifecycles/classes/event.py b/cdlib/lifecycles/classes/event.py
new file mode 100644
index 00000000..d93c61e5
--- /dev/null
+++ b/cdlib/lifecycles/classes/event.py
@@ -0,0 +1,959 @@
+from cdlib.classes import TemporalClustering
+from cdlib.lifecycles.classes.matching import CommunityMatching
+from cdlib.lifecycles.algorithms.null_model import flow_null, all_flows_null
+from cdlib.lifecycles.algorithms.event_analysis import (
+ events_all,
+ analyze_all_flows,
+ analyze_flow,
+)
+from cdlib.lifecycles.algorithms.classic_match import *
+import networkx as nx
+from collections import defaultdict
+from typing import Callable
+import json
+
+
+class CommunityEvent(object):
+ def __init__(self, com_id):
+ """
+ Constructor
+
+ :param com_id: community id
+ """
+
+ self.com_id = com_id
+ self.from_event = {}
+ self.to_event = {}
+ self.in_flow = {}
+ self.out_flow = {}
+
+ def set_from_event(self, from_event: dict):
+ """
+ Set from event
+
+ :param from_event: from event
+ """
+ self.from_event = {f: v for f, v in from_event.items() if v > 0}
+
+ def set_to_event(self, to_event: dict):
+ """
+ Set to event
+
+ :param to_event: to event
+ """
+ self.to_event = {t: v for t, v in to_event.items() if v > 0}
+
+ def set_in_flow(self, in_flow: dict):
+ """
+ Set in flow
+
+ :param in_flow: in flow
+ """
+ self.in_flow = in_flow
+
+ def set_out_flow(self, out_flow: dict):
+ """
+ Set out flow
+
+ :param out_flow: out flow
+ """
+ self.out_flow = out_flow
+
+ def get_from_event(self) -> dict:
+ """
+ Get from event
+
+ :return: from event
+ """
+ return self.from_event
+
+ def get_to_event(self) -> dict:
+ """
+ Get to event
+
+ :return: to event
+ """
+ return self.to_event
+
+ def get_in_flow(self) -> dict:
+ """
+ Get in flow
+
+ :return: in flow
+ """
+ return self.in_flow
+
+ def get_out_flow(self) -> dict:
+ """
+ Get out flow
+
+ :return: out flow
+ """
+ return self.out_flow
+
+ def to_json(self) -> dict:
+ """
+ Convert the event to json
+
+ :return: the event as json
+ """
+ res = {
+ "com_id": self.com_id,
+ "from_event": self.from_event,
+ "to_event": self.to_event,
+ "in_flow": {k: list(v) for k, v in self.in_flow.items()},
+ "out_flow": {k: list(v) for k, v in self.out_flow.items()},
+ }
+
+ return res
+
+
+class LifeCycle(object):
+ """
+ Class representing the lifecycle of a temporal clustering.
+ It allows to compute the events composing the lifecycle (leveraging different definitions)
+ and to analyze them starting from a TemporalClustering object.
+ """
+
+ def __init__(self, clustering: TemporalClustering = None):
+ """
+ Constructor
+
+ :param clustering: a TemporalClustering Object
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ """
+ self.clustering = clustering
+ self.events = {}
+ self.event_types = []
+ self.cm = CommunityMatching()
+ if clustering is not None:
+ self.cm.set_temporal_clustering(self.clustering)
+ self.algo = None
+
+ def compute_events_from_explicit_matching(self):
+ """
+ Compute the events of the lifecycle using the explicit matching (if available)
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> from dynetx import DynGraph
+ >>> dg = DynGraph()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> dg.add_interactions_from(g, t)
+ >>> tc = algorithms.tiles(dg, 10)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events_from_explicit_matching()
+ """
+ if not self.clustering.has_explicit_match():
+ raise ValueError("No explicit matching available")
+
+ lifecycle = self.clustering.get_explicit_community_match()
+
+ flows = {
+ "+": defaultdict(lambda: defaultdict(set)),
+ "-": defaultdict(lambda: defaultdict(set)),
+ }
+ events = {
+ "+": defaultdict(lambda: defaultdict(set)),
+ "-": defaultdict(lambda: defaultdict(set)),
+ }
+
+ for e in lifecycle:
+ xtid = int(e[0].split("_")[0])
+ ytid = int(e[1].split("_")[0])
+ if xtid < ytid:
+ flows["+"][e[0]][e[1]] = set(
+ self.clustering.get_community(e[0])
+ ).intersection(set(self.clustering.get_community(e[1])))
+ else:
+ flows["-"][e[0]][e[1]] = set(
+ self.clustering.get_community(e[0])
+ ).intersection(set(self.clustering.get_community(e[1])))
+
+ self.__instantiate_events(flows, events)
+
+ def compute_events_with_custom_matching(
+ self,
+ method: Callable[[set, set], float],
+ two_sided: bool = True,
+ threshold: float = 0.2,
+ ):
+ """
+ Compute the events of the lifecycle using a custom matching similarity function
+
+
+ :param method: a set similarity function with co-domain in [0,1] (e.g., Jaccard)
+ :param two_sided: boolean.
+ Whether the match has to be applied only from the past to the future (False)
+ or even from the future to the past (True, default)
+ :param threshold: the threshold above which two communities are considered matched
+
+ :Example:
+
+ >>> from cdlib import algorithms
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> tc = TemporalClustering()
+ >>> # build the temporal clustering object
+ >>> evts = LifeCycle(tc)
+ >>> jaccard = lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y))
+ >>> evts.compute_events_with_custom_matching(jaccard, two_sided=True, threshold=0.2)
+ """
+
+ self.event_types = ["Merge", "Split", "Continuation"]
+ self.algo = "custom"
+ lifecycle = []
+
+ for i in range(self.clustering.current_observation - 1):
+ c_i = self.clustering.clusterings[i]
+ c_j = self.clustering.clusterings[i + 1]
+ for name_i, com_i in c_i.named_communities.items():
+
+ best_match = []
+ best_score = 0
+
+ for name_j, com_j in c_j.named_communities.items():
+
+ match = method(com_i, com_j)
+ if match > best_score:
+ best_match = [name_j]
+ best_score = match
+ elif match == best_score:
+ best_match.append(name_j)
+
+ for j in best_match:
+ lifecycle.append((name_i, j, best_score))
+
+ if two_sided:
+
+ for i in range(self.clustering.current_observation - 1, 0, -1):
+ c_i = self.clustering.clusterings[i]
+ c_j = self.clustering.clusterings[i - 1]
+
+ for name_i, com_i in c_i.named_communities.items():
+ # name_i = f"{self.obs_to_time[i]}_{cid_i}"
+ best_match = []
+ best_score = 0
+
+ for name_j, com_j in c_j.named_communities.items():
+ # name_j = f"{self.obs_to_time[i-1]}_{cid_j}"
+
+ match = method(com_i, com_j)
+ if match > best_score:
+ best_match = [name_j]
+ best_score = match
+ elif match == best_score:
+ best_match.append(name_j)
+
+ for j in best_match:
+ lifecycle.append((j, name_i, best_score))
+
+ flows = {
+ "+": defaultdict(lambda: defaultdict(set)),
+ "-": defaultdict(lambda: defaultdict(set)),
+ }
+ events = {
+ "+": defaultdict(lambda: defaultdict(set)),
+ "-": defaultdict(lambda: defaultdict(set)),
+ }
+
+ for e in lifecycle:
+ xtid = int(e[0].split("_")[0])
+ ytid = int(e[1].split("_")[0])
+ if e[2] > threshold:
+ if xtid < ytid:
+ flows["+"][e[0]][e[1]] = set(
+ self.clustering.get_community(e[0])
+ ).intersection(set(self.clustering.get_community(e[1])))
+ else:
+ flows["-"][e[0]][e[1]] = set(
+ self.clustering.get_community(e[0])
+ ).intersection(set(self.clustering.get_community(e[1])))
+
+ self.__instantiate_events(flows, events)
+
+ def __instantiate_events(self, flows, events):
+ for e in flows["-"]:
+ if len(flows["-"][e].keys()) == 1:
+ events["-"][e] = {"Continuation": 1}
+ else:
+ events["-"][e] = {"Merge": 1}
+
+ for e in flows["+"]:
+ if len(flows["+"][e].keys()) == 1:
+ events["+"][e] = {"Continuation": 1}
+ else:
+ events["+"][e] = {"Split": 1}
+
+ for cid in flows["+"]:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_out_flow(flows["+"][cid])
+
+ for cid in flows["-"]:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_in_flow(flows["-"][cid])
+
+ from_events = events["-"]
+ to_events = events["+"]
+
+ for cid in from_events:
+ self.events[cid].set_from_event(
+ {k: v for k, v in from_events[cid].items() if v > 0}
+ )
+
+ for cid in to_events:
+ self.events[cid].set_to_event(
+ {k: v for k, v in to_events[cid].items() if v > 0}
+ )
+
+ def compute_events(
+ self,
+ matching_type: str = "facets",
+ matching_params: dict = {"min_branch_size": 1, "threshold": 0.5},
+ ):
+ """
+ Compute the events of the lifecycle
+
+ :param matching_type: the type of matching algorithm to use. Options are "facets", "asur", "greene".
+ :param matching_params: the parameters of the matching algorithm.
+ Defaults to {"min_branch_size": 1, "threshold": 0.5}.
+ The former parameter is required for "facets", the latter by "asur" and "greene".
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+
+ """
+
+ if matching_type == "facets":
+
+ self.algo = "facets"
+
+ self.event_types = [
+ "Birth",
+ "Accumulation",
+ "Growth",
+ "Expansion",
+ "Continuation",
+ "Merge",
+ "Offspring",
+ "Reorganization",
+ "Death",
+ "Dispersion",
+ "Shrink",
+ "Reduction",
+ "Continuation",
+ "Split",
+ "Ancestor",
+ "Disassemble",
+ ]
+
+ out_flows = self.cm.all_flows(
+ "+", min_branch_size=matching_params["min_branch_size"]
+ )
+
+ for cid in out_flows:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_out_flow(out_flows[cid])
+
+ in_flows = self.cm.all_flows(
+ "-", min_branch_size=matching_params["min_branch_size"]
+ )
+
+ for cid in in_flows:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_in_flow(in_flows[cid])
+
+ events = events_all(self.cm)
+ from_events = events["-"]
+ to_events = events["+"]
+
+ for cid in from_events:
+ self.events[cid].set_from_event(
+ {k: v for k, v in from_events[cid].items() if v > 0}
+ )
+
+ for cid in to_events:
+ self.events[cid].set_to_event(
+ {k: v for k, v in to_events[cid].items() if v > 0}
+ )
+
+ elif matching_type == "asur":
+
+ self.algo = "asur"
+
+ self.event_types = ["Merge", "Split", "Continuation", "Birth", "Death"]
+
+ events, flows = events_asur(self.cm, th=matching_params["threshold"])
+
+ c_to_evt = defaultdict(lambda: defaultdict(int))
+ c_from_evt = defaultdict(lambda: defaultdict(int))
+ for _, v in events.items():
+
+ for e in v:
+ src_tid = int(e["src"].split("_")[0])
+ if "ref_sets" in e:
+ trg_tid = int(e["ref_sets"][0].split("_")[0])
+ else:
+ trg_tid = int(e["ref_set"].split("_")[0])
+
+ if src_tid < trg_tid:
+ c_to_evt[e["src"]][e["type"]] += 1
+ else:
+ c_from_evt[e["src"]][e["type"]] += 1
+
+ c_from_evt = {
+ k: {k2: v2 / sum(v.values()) for k2, v2 in v.items() if v2 > 0}
+ for k, v in c_from_evt.items()
+ }
+ c_to_evt = {
+ k: {k2: v2 / sum(v.values()) for k2, v2 in v.items() if v2 > 0}
+ for k, v in c_to_evt.items()
+ }
+
+ c_from_flow = defaultdict(lambda: defaultdict(list))
+ c_to_flow = defaultdict(lambda: defaultdict(list))
+
+ for _, v in flows.items():
+ for e in v:
+ src_tid = int(e["src"].split("_")[0])
+ trg_tid = int(e["target"].split("_")[0])
+
+ if src_tid < trg_tid:
+ c_from_flow[e["src"]][e["target"]] = e["flow"]
+ else:
+ c_to_flow[e["src"]][e["target"]] = e["flow"]
+
+ for cid in c_to_flow:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_in_flow(c_to_flow[cid])
+
+ for cid in c_from_flow:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_out_flow(c_from_flow[cid])
+
+ for cid in c_to_evt:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_to_event(
+ {k: v for k, v in c_to_evt[cid].items() if v > 0}
+ )
+
+ for cid in c_from_evt:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_from_event(
+ {k: v for k, v in c_from_evt[cid].items() if v > 0}
+ )
+
+ elif matching_type == "greene":
+
+ self.algo = "greene"
+
+ self.event_types = ["Merge"]
+
+ events, flow = event_graph_greene(self.cm, th=matching_params["threshold"])
+ c_to_evt = defaultdict(lambda: defaultdict(int))
+ c_from_evt = defaultdict(lambda: defaultdict(int))
+ for _, v in events.items():
+
+ for e in v:
+ src_tid = int(e["src"].split("_")[0])
+ if "ref_sets" in e:
+ trg_tid = int(e["ref_sets"][0].split("_")[0])
+ else:
+ trg_tid = int(e["ref_set"].split("_")[0])
+
+ if src_tid < trg_tid:
+ c_to_evt[e["src"]][e["type"]] += 1
+ else:
+ c_from_evt[e["src"]][e["type"]] += 1
+
+ for cid in flow:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_in_flow(flow[cid])
+
+ for cid in c_to_evt:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_to_event(
+ {k: v for k, v in c_to_evt[cid].items() if v > 0}
+ )
+
+ for cid in c_from_evt:
+ if cid not in self.events:
+ self.events[cid] = CommunityEvent(cid)
+ self.events[cid].set_from_event(
+ {k: v for k, v in c_from_evt[cid].items() if v > 0}
+ )
+
+ else:
+ raise ValueError(f"Unknown matching type {matching_type}")
+
+ def get_event(self, com_id: str) -> CommunityEvent:
+ """
+ Get the events associated to a community
+
+ :param com_id: the community id
+ :return: the events associated to the community
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> evt = events.get_event("0_2")
+
+ """
+ return self.events.get(com_id)
+
+ def get_events(self) -> dict:
+ """
+ Get all the events
+
+ :return: the events
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> evts = events.get_events()
+ """
+ return self.events
+
+ def get_event_types(self) -> list:
+ """
+ Get the event types
+
+ :return: the event types
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> evts = events.get_event_types()
+ """
+ return self.event_types
+
+ def analyze_flows(
+ self, direction: str = "+", min_branch_size: int = 1, attr=None
+ ) -> dict:
+ """
+ Analyze the flows of the lifecycle
+
+ :param direction: the temporal direction in which the flows are to be analyzed. Options are "+" and "-".
+ :param min_branch_size: the minimum branch size
+ :param attr: the attribute to analyze
+ :return: the analyzed flows
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> c = events.analyze_flows("+")
+
+ """
+ if self.cm is not None:
+ return analyze_all_flows(self.cm, direction, min_branch_size, attr)
+ else:
+ raise ValueError("No temporal clustering set")
+
+ def analyze_flow(
+ self, com_id: str, direction: str = "+", min_branch_size: int = 1, attr=None
+ ) -> dict:
+ """
+ Analyze the flow of a community
+
+ :param com_id: the community id
+ :param direction: the temporal direction in which the flow is to be analyzed. Options are "+" and "-".
+ :param min_branch_size: the minimum branch size
+ :param attr: the attribute to analyze
+ :return: the analyzed flow
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ """
+ if self.cm is not None:
+ return analyze_flow(self.cm, com_id, direction, min_branch_size, attr)
+ else:
+ raise ValueError("No temporal clustering set")
+
+ def set_attribute(self, attr: dict, name: str):
+ """
+ Set the attributes of the lifecycle
+
+ :param attr: the attributes
+ :param name: the name of the attribute
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> import random
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>>
+ >>> def random_attributes():
+ >>> attrs = {}
+ >>> for i in range(250):
+ >>> attrs[i] = {}
+ >>> for t in range(10):
+ >>> attrs[i][t] = random.choice(["A", "B", "C", "D", "E"])
+ >>> return attrs
+ >>>
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> events.set_attribute(random_attributes(), "fakeattribute")
+
+ """
+ if self.cm is not None:
+ self.cm.set_attributes(attr, name)
+ else:
+ raise ValueError("No temporal clustering set")
+
+ def get_attribute(self, name: str) -> dict:
+ """
+ Get the attributes associated to the nodes
+
+ :param name: the name of the attribute
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> import random
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>>
+ >>> def random_attributes():
+ >>> attrs = {}
+ >>> for i in range(250):
+ >>> attrs[i] = {}
+ >>> for t in range(10):
+ >>> attrs[i][t] = random.choice(["A", "B", "C", "D", "E"])
+ >>> return attrs
+ >>>
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> events.set_attribute(random_attributes(), "fakeattribute")
+ >>> attrs = events.get_attribute("fakeattribute")
+ """
+ if self.cm is not None:
+ return self.cm.get_attributes(name)
+ else:
+ raise ValueError("No temporal clustering set")
+
+ def polytree(self) -> nx.DiGraph:
+ """
+ Reconstruct the poly-tree representing communities lifecycles using a provided similarity function.
+
+ :return: a networkx DiGraph object.
+ Nodes represent communities, their ids are assigned following the pattern {tid}_{cid},
+ where tid is the time of observation and
+ cid is the position of the community within the Clustering object.
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> g = events.polytree()
+ """
+
+ g = nx.DiGraph()
+ for e in self.events:
+ evt = self.events[e]
+ for f in evt.get_in_flow():
+ g.add_edge(f, e)
+ for t in evt.get_out_flow():
+ g.add_edge(e, t)
+
+ return g
+
+ def validate_flow(
+ self,
+ target: str,
+ direction: str,
+ min_branch_size: int = 1,
+ iterations: int = 1000,
+ ) -> dict:
+ """
+ Compare the flow with a null model. Each branch of each flow is compared with a null branch of the same size.
+ The null model is generated by randomly sampling elements from the reference partition *iterations* times.
+ The mean and standard deviation of the null model are used to compute a z-score
+ for each branch, which is then used to compute a p-value.
+
+ :param target: target set identifier
+ :param direction: temporal direction, either "+" (out flow) or "-" (in flow)
+ :param min_branch_size: minimum size of a branch to be considered
+ :param iterations: number of random draws to be used to generate the null model
+ :return:
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> cf = events.validate_flow("0_2", "+")
+ """
+ return flow_null(self.cm, target, direction, min_branch_size, iterations)
+
+ def validate_all_flows(
+ self, direction: str, min_branch_size=1, iterations=1000
+ ) -> dict:
+ """
+ Compare all flows with null models. See validate_flow for details.
+
+ :param direction: temporal direction, either "+" (out flow) or "-" (in flow)
+ :param min_branch_size: minimum size of a branch to be considered
+ :param iterations: number of random draws to be used to generate the null model
+ :return: a dictionary keyed by set identifier and valued by mean, std, and p-value
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> vf = events.validate_all_flows("+")
+ """
+ return all_flows_null(self.cm, direction, min_branch_size, iterations)
+
+ def to_json(self) -> dict:
+ """
+ Convert the lifecycle to json
+
+ :return: the lifecycle as json
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> events.to_json()
+ """
+ res = {
+ "algorithm": self.algo,
+ "events": {k: v.to_json() for k, v in self.events.items()},
+ "event_types": list(self.event_types),
+ }
+
+ return res
diff --git a/cdlib/lifecycles/classes/matching.py b/cdlib/lifecycles/classes/matching.py
new file mode 100644
index 00000000..9d6944c7
--- /dev/null
+++ b/cdlib/lifecycles/classes/matching.py
@@ -0,0 +1,268 @@
+import json
+from collections import defaultdict
+
+__all__ = ["CommunityMatching"]
+
+
+class CommunityMatching(object):
+ """
+ A class to represent and analyze temporally-evolving groups.
+ """
+
+ def __init__(self, dtype: type = int) -> None:
+
+ self.dtype = dtype
+ self.tids = []
+ self.named_sets = defaultdict(set)
+ self.tid_to_named_sets = defaultdict(list)
+ self.attributes = defaultdict(dict)
+
+ # Convenience get methods
+ def temporal_ids(self) -> list:
+ """
+ retrieve the temporal ids of the CommunityMatching.
+ Temporal ids are integers that represent the observation time of a partition.
+ """
+ return self.tids
+
+ def slice(self, start: int, end: int) -> object:
+ """
+ slice the CommunityMatching to keep only a given interval
+
+ :param start: the start of the interval
+ :param end: the end of the interval
+ :return: a new CommunityMatching object
+ """
+ temp = CommunityMatching(self.dtype)
+ temp.tids = self.tids[start:end]
+ temp.named_sets = {
+ k: v
+ for k, v in self.named_sets.items()
+ if int(k.split("_")[0]) in temp.tids
+ }
+ temp.tid_to_named_sets = {
+ k: v for k, v in self.tid_to_named_sets.items() if int(k) in temp.tids
+ }
+ temp_attrs = {}
+ for attr_name, attr in self.attributes.items():
+ temp_attrs[attr_name] = {k: v for k, v in attr.items() if k in temp.tids}
+ temp.attributes = temp_attrs
+ return temp
+
+ def universe_set(self) -> set:
+ """
+ retrieve the universe set.
+ The universe set is the union of all sets in the CommunityMatching
+
+ :return: the universe set
+ """
+ universe = set()
+ for set_ in self.named_sets.values():
+ universe = universe.union(set_)
+ return universe
+
+ def groups_ids(self) -> list:
+ """
+ retrieve the group ids of the CommunityMatching. Each id is of the form 'tid_gid' where tid is the temporal id
+ and gid is the group id. The group id is a unique identifier of the group within the temporal id.
+
+ :return: a list of ids of the temporal groups
+ """
+ return list(self.named_sets.keys())
+
+ # Partition methods
+ def __add_partition(self, partition: list) -> None:
+ """
+ add a partition to the CommunityMatching. A partition is a list of sets observed at a given time instant. Each
+ partition will be assigned a unique id (tid) corresponding to the observation time, and each set in the
+ partition will be assigned a unique name
+
+ :param partition: a collection of sets
+ :return: None
+ """
+
+ tid = len(self.tids)
+ self.tids.append(tid)
+
+ for i, group in enumerate(partition):
+ name = str(tid) + "_" + str(i)
+ self.tid_to_named_sets[str(tid)].append(name)
+
+ if self.dtype in [int, float, str]:
+ try:
+ self.named_sets[name] = set(group)
+ except TypeError: # group is not iterable (only 1 elem)
+ tmp = set()
+ tmp.add(group)
+ self.named_sets[name] = tmp
+ else:
+ raise NotImplementedError("dtype not supported")
+
+ def set_temporal_clustering(self, partitions: object) -> None:
+ """
+ add multiple partitions to the CommunityMatching.
+
+ :param partitions: a list of partitions
+ :return: None
+ """
+ tids = partitions.get_observation_ids()
+ for t in tids:
+ self.__add_partition(partitions.get_clustering_at(t).communities)
+
+ def get_partition_at(self, tid: int) -> list:
+ """
+ retrieve a partition by id
+
+ :param tid: the id of the partition to retrieve
+ :return: the partition corresponding to the given id
+ """
+ if str(tid) not in self.tid_to_named_sets:
+ return []
+ return self.tid_to_named_sets[str(tid)]
+
+ # Attribute methods
+ def set_attributes(self, attributes: dict, attr_name: str) -> None:
+ """
+ set the temporal attributes of the elements in the CommunityMatching
+
+ The temporal attributes must be provided as a dictionary keyed by the element id and valued by a dictionary
+ keyed by the temporal id and valued by the attribute value.
+
+ :param attr_name: the name of the attribute
+ :param attributes: a dictionary of temporal attributes
+ :return: None
+ """
+ self.attributes[attr_name] = attributes
+
+ def get_attributes(self, attr_name, of=None) -> dict:
+ """
+ retrieve the temporal attributes of the CommunityMatching
+
+ :param attr_name: the name of the attribute
+ :param of: the element for which to retrieve the attributes. If None, all attributes are returned
+
+ :return: a dictionary keyed by element id and valued by a dictionary keyed by temporal id and valued
+ by the attribute value
+ """
+ if of is None:
+ return self.attributes[attr_name]
+ else:
+ return self.attributes[attr_name][of]
+
+ # Set methods
+ def get_group(self, gid: str) -> set:
+ """
+ retrieve a group by id
+
+ :param gid: the name of the group to retrieve
+ :return: the group corresponding to the given name
+ """
+ return self.named_sets[gid]
+
+ def group_iterator(self, tid: int = None) -> iter:
+ """
+ returns an iterator over the groups of the CommunityMatching.
+ if a temporal id is provided, it will iterate over the groups observed at that time instant
+
+ :param tid: the temporal id of the groups to iterate over. Default is None
+ :return: an iterator over the groups
+ """
+ if tid is None:
+ yield from self.named_sets.values()
+ else:
+ for name in self.get_partition_at(tid):
+ yield self.named_sets[name]
+
+ def filter_on_group_size(self, min_size: int = 1, max_size: int = None) -> None:
+ """
+ remove groups that do not meet the size criteria
+
+ :param min_size: the minimum size of the groups to keep
+ :param max_size: the maximum size of the groups to keep
+ :return: None
+ """
+
+ if max_size is None:
+ max_size = len(self.universe_set())
+
+ for name, set_ in self.named_sets.copy().items():
+ if len(set_) < min_size or len(set_) > max_size:
+ del self.named_sets[name]
+ self.tid_to_named_sets[name.split("_")[0]].remove(name)
+
+ # Element-centric methods
+ def get_element_membership(self, element: object) -> list:
+ """
+ retrieve the list of sets that contain a given element
+
+ :param element: the element for which to retrieve the memberships
+ :return: a list of set names that contain the given element
+ """
+
+ memberships = list()
+ for name, set_ in self.named_sets.items():
+ if element in set_:
+ memberships.append(name)
+ return memberships
+
+ def get_all_element_memberships(self) -> dict:
+ """
+ retrieve the list of sets that contain each element in the CommunityMatching
+
+ :return: a dictionary keyed by element and valued by a list of set names that contain the element
+ """
+
+ memberships = defaultdict(list)
+
+ for element in self.universe_set():
+ for name, set_ in self.named_sets.items():
+ if element in set_:
+ memberships[element].append(name)
+
+ return memberships
+
+ # Flow methods
+ def group_flow(self, target: str, direction: str, min_branch_size: int = 1) -> dict:
+ """
+ compute the flow of a group w.r.t. a given temporal direction. The flow of a group is the collection of groups
+ that contain at least one element of the target group, Returns a dictionary keyed by group name and valued by
+ the intersection of the target group and the group corresponding to the key.
+
+ :param target: the name of the group to analyze
+ :param direction: the temporal direction in which the group is to be analyzed
+ :param min_branch_size: the minimum size of the intersection between the target group and the group
+ :return: a dictionary keyed by group name and valued by the intersection of the target group and the group
+ """
+ flow = dict()
+ tid = int(target.split("_")[0])
+ if direction == "+":
+ ref_tid = tid + 1
+ elif direction == "-":
+ ref_tid = tid - 1
+ else:
+ raise ValueError("direction must either be + or -")
+ reference = self.get_partition_at(ref_tid)
+ target_set = self.get_group(target)
+
+ for name in reference:
+ set_ = self.get_group(name)
+ branch = target_set.intersection(set_)
+ if len(branch) >= min_branch_size:
+ flow[name] = branch
+ return flow
+
+ def all_flows(self, direction: str, min_branch_size: int = 1) -> dict:
+ """
+ compute the flow of all groups w.r.t. a given temporal direction
+
+ :param direction: the temporal direction in which the sets are to be analyzed
+ :param min_branch_size: the minimum size of a branch to be considered
+ :return: a dictionary keyed by group name and valued by the flow of the group
+ """
+ all_flows = dict()
+ for name in self.named_sets:
+ all_flows[name] = self.group_flow(
+ name, direction, min_branch_size=min_branch_size
+ )
+
+ return all_flows
diff --git a/cdlib/lifecycles/utils/__init__.py b/cdlib/lifecycles/utils/__init__.py
new file mode 100644
index 00000000..16281fe0
--- /dev/null
+++ b/cdlib/lifecycles/utils/__init__.py
@@ -0,0 +1 @@
+from .utils import *
diff --git a/cdlib/lifecycles/utils/utils.py b/cdlib/lifecycles/utils/utils.py
new file mode 100644
index 00000000..cbb751c9
--- /dev/null
+++ b/cdlib/lifecycles/utils/utils.py
@@ -0,0 +1,81 @@
+__all__ = [
+ "backward_event_names",
+ "forward_event_names",
+ "colormap",
+ "get_group_attribute_values",
+]
+
+
+def backward_event_names() -> list:
+ """
+ return the list of backward event names
+ """
+ return [
+ "Birth",
+ "Accumulation",
+ "Growth",
+ "Expansion",
+ "Continuation",
+ "Merge",
+ "Offspring",
+ "Reorganization",
+ ]
+
+
+def forward_event_names() -> list:
+ """
+ return the list of forward event names
+ """
+ return [
+ "Death",
+ "Dispersion",
+ "Shrink",
+ "Reduction",
+ "Continuation",
+ "Split",
+ "Ancestor",
+ "Disassemble",
+ ]
+
+
+def colormap() -> dict:
+ """
+ return a dictionary of colors for each event type.
+ this is used to color the events in the visualization
+ """
+
+ return {
+ "Birth": " #808000",
+ "Accumulation": "#4CC89F",
+ "Growth": "#929292",
+ "Expansion": "#5C5C5C",
+ "Continuation": "#CFBAE1",
+ "Merge": "#E34856",
+ "Offspring": "#0DAAE9",
+ "Reorganization": "#FFA500",
+ "Death": " #808000",
+ "Dispersion": "#4CC89F",
+ "Shrink": "#929292",
+ "Reduction": "#5C5C5C",
+ "Split": "#E34856",
+ "Ancestor": "#0DAAE9",
+ "Disassemble": "#FFA500",
+ }
+
+
+def get_group_attribute_values(lc: object, target: str, attr_name: str) -> list:
+ """
+ retrieve the list of attributes of the elements in a set
+
+ :param lc: a LifeCycle object
+ :param target: the id of the set
+ :param attr_name: the name of the attribute
+ :return: a list of attributes corresponding to the elements in the set
+ """
+
+ tid = int(target.split("_")[0])
+ attributes = list()
+
+ for elem in lc.get_group(target):
+ attributes.append(lc.get_attributes(attr_name, of=elem)[tid])
+ return attributes
diff --git a/cdlib/readwrite/io.py b/cdlib/readwrite/io.py
index f6531adf..72bcbd7a 100644
--- a/cdlib/readwrite/io.py
+++ b/cdlib/readwrite/io.py
@@ -1,4 +1,10 @@
-from cdlib import NodeClustering, FuzzyNodeClustering, EdgeClustering
+from cdlib import (
+ NodeClustering,
+ FuzzyNodeClustering,
+ EdgeClustering,
+ LifeCycle,
+ CommunityEvent,
+)
import json
import gzip
@@ -8,6 +14,8 @@
"write_community_json",
"read_community_json",
"read_community_from_json_string",
+ "write_lifecycle_json",
+ "read_lifecycle_json",
]
@@ -187,9 +195,6 @@ def read_community_from_json_string(json_repr: str) -> object:
>>> g = nx.karate_club_graph()
>>> coms = algorithms.louvain(g)
>>> readwrite.write_community_json(coms, "communities.json")
- >>> with open("community.json") as f:
- >>> cr = f.read()
- >>> readwrite.write_community_from_json_string(cr)
"""
coms = json.loads(json_repr)
@@ -218,3 +223,108 @@ def read_community_from_json_string(json_repr: str) -> object:
nc.__class__ = EdgeClustering
return nc
+
+
+def write_lifecycle_json(lifecycle: LifeCycle, path: str, compress: bool = False):
+ """
+ Save lifecycle structure to JSON file.
+
+ :param lifecycle: a LifeCycle object
+ :param path: output filename
+ :param compress: wheter to copress the JSON, default False
+ :return: a JSON formatted string representing the object
+
+ :Example:
+
+ >>> from cdlib import LifeCycle, TemporalClustering
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> from cdlib.readwrite import write_lifecycle_json, read_lifecycle_json
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>>
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> write_lifecycle_json(events, "lifecycle.json")
+ """
+
+ repr_ = lifecycle.to_json()
+ js_dmp = json.dumps(repr_)
+
+ if compress:
+ op = gzip.open
+ else:
+ op = open
+
+ with op(path, "wt") as f:
+ f.write(js_dmp)
+
+
+def read_lifecycle_json(path: str, compress: bool = False) -> object:
+ """
+ Read lifecycle from JSON file.
+
+ :param path: input filename
+ :param compress: wheter the file is in a copress format, default False
+ :return: a LifeCycle object
+
+ :Example:
+
+ >>> from cdlib import LifeCycle, TemporalClustering
+ >>> from cdlib import algorithms
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> from cdlib.readwrite import write_lifecycle_json, read_lifecycle_json
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>>
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> write_lifecycle_json(events, "lifecycle.json")
+ >>> events = read_lifecycle_json("lifecycle.json")
+
+ """
+
+ if compress:
+ op = gzip.open
+ else:
+ op = open
+
+ with op(path, "rt") as f:
+ repr_ = json.load(f)
+
+ lc = LifeCycle()
+
+ lc.event_types = repr_["event_types"]
+ lc.algo = repr_["algorithm"]
+
+ for e in repr_["events"]:
+ evt = CommunityEvent(e)
+ evt.from_event = repr_["events"][e]["from_event"]
+ evt.to_event = repr_["events"][e]["to_event"]
+ evt.in_flow = repr_["events"][e]["in_flow"]
+ evt.out_flow = repr_["events"][e]["out_flow"]
+ lc.events[e] = evt
+
+ return lc
diff --git a/cdlib/test/test_events.py b/cdlib/test/test_events.py
new file mode 100644
index 00000000..3676bd6f
--- /dev/null
+++ b/cdlib/test/test_events.py
@@ -0,0 +1,269 @@
+import unittest
+import cdlib
+from cdlib import algorithms
+from cdlib import LifeCycle
+from cdlib import TemporalClustering
+from cdlib.lifecycles.algorithms.event_analysis import (
+ facets,
+ event_weights,
+ event as evn,
+)
+from plotly import graph_objects as go
+import networkx as nx
+from networkx.generators.community import LFR_benchmark_graph
+import matplotlib.pyplot as plt
+import dynetx as dn
+import os
+from cdlib.viz import (
+ plot_flow,
+ plot_event_radar,
+ plot_event_radars,
+ typicality_distribution,
+)
+
+
+class EventTest(unittest.TestCase):
+ def test_creation(self):
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets")
+
+ c = events.analyze_flows("+")
+ self.assertIsInstance(c, dict)
+ c = events.analyze_flow("0_2", "+")
+ self.assertIsInstance(c, dict)
+
+ events = LifeCycle(tc)
+ events.compute_events("asur")
+
+ c = events.analyze_flows("+")
+ self.assertIsInstance(c, dict)
+ c = events.analyze_flow("0_2", "+")
+ self.assertIsInstance(c, dict)
+
+ events = LifeCycle(tc)
+ events.compute_events("greene")
+
+ c = events.analyze_flows("+")
+ self.assertIsInstance(c, dict)
+
+ c = events.analyze_flow("0_2", "+")
+ self.assertIsInstance(c, dict)
+
+ def test_custom_matching(self):
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ jaccard = lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y))
+ events.compute_events_with_custom_matching(jaccard, two_sided=True)
+ c = events.analyze_flows("+")
+ self.assertIsInstance(c, dict)
+
+ events.compute_events_with_custom_matching(
+ jaccard, two_sided=False, threshold=0
+ )
+ c = events.analyze_flows("+")
+ self.assertIsInstance(c, dict)
+
+ def test_polytree(self):
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets")
+ g = events.polytree()
+ self.assertIsInstance(g, nx.DiGraph)
+
+ def test_null_model(self):
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets")
+ cf = events.validate_flow("0_2", "+")
+ self.assertIsInstance(cf, dict)
+
+ vf = events.validate_all_flows("+")
+ self.assertIsInstance(vf, dict)
+
+ def test_viz(self):
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets")
+
+ fig = plot_flow(events)
+ self.assertIsInstance(fig, go.Figure)
+
+ plot_event_radar(events, "0_2", direction="+")
+ plt.savefig("radar.pdf")
+ os.remove("radar.pdf")
+
+ plot_event_radars(events, "0_2")
+ plt.savefig("radars.pdf")
+ os.remove("radars.pdf")
+
+ typicality_distribution(events, "+")
+ plt.savefig("td.pdf")
+ os.remove("td.pdf")
+
+ def test_explicit(self):
+
+ dg = dn.DynGraph()
+ for x in range(10):
+ g = nx.erdos_renyi_graph(200, 0.05)
+ dg.add_interactions_from(list(g.edges()), t=x)
+ coms = algorithms.tiles(dg, 2)
+
+ events = LifeCycle(coms)
+ events.compute_events_from_explicit_matching()
+
+ c = events.analyze_flows("+")
+ self.assertIsInstance(c, dict)
+
+ def test_node_attributes(self):
+ import random
+
+ def random_attributes():
+ attrs = {}
+ for i in range(250):
+ attrs[i] = {}
+ for t in range(10):
+ attrs[i][t] = random.choice(["A", "B", "C", "D", "E"])
+ return attrs
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets")
+ events.set_attribute(random_attributes(), "fakeattribute")
+ attrs = events.get_attribute("fakeattribute")
+ self.assertIsInstance(attrs, dict)
+
+ events.analyze_flow("1_1", "+", attr="fakeattribute")
+ self.assertIsInstance(attrs, dict)
+
+ ev = events.get_event("1_1")
+ a = ev.out_flow # to get the out flow of the community 1_2
+ self.assertIsInstance(a, dict)
+ a = ev.in_flow # to get the in flow of the community 1_2
+ self.assertIsInstance(a, dict)
+ a = ev.from_event # to get the from events of the community 1_2
+ self.assertIsInstance(a, dict)
+ a = ev.to_event # to get the to events of the community 1_2
+ self.assertIsInstance(a, dict)
+
+ def test_marginal(self):
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets")
+
+ # marginal tests (not all methods are tested since they are not of use in cdlib -
+ # they are invoked for completeness)
+ self.assertIsInstance(
+ events.cm.slice(0, 5), cdlib.lifecycles.classes.matching.CommunityMatching
+ )
+ self.assertIsInstance(events.cm.universe_set(), set)
+ self.assertIsInstance(list(events.cm.group_iterator()), list)
+ self.assertIsInstance(list(events.cm.group_iterator(3)), list)
+ events.cm.filter_on_group_size(1, 100)
+ events.cm.get_element_membership(1)
+ events.cm.get_all_element_memberships()
+ events.get_events()
+ events.get_event_types()
+ ev = events.get_event("1_1")
+ ev.get_from_event()
+ ev.get_to_event()
+ facets((events.cm), "0_2", "+")
+ event_weights(events.cm, "0_2", "+")
+ evn(events.cm, "0_2", "+")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/cdlib/test/test_io.py b/cdlib/test/test_io.py
index dc5ff44f..5cbfcd9b 100644
--- a/cdlib/test/test_io.py
+++ b/cdlib/test/test_io.py
@@ -1,6 +1,10 @@
import unittest
-from cdlib import algorithms
+
from cdlib import readwrite
+from cdlib import LifeCycle, TemporalClustering
+from cdlib import algorithms
+from networkx.generators.community import LFR_benchmark_graph
+from cdlib.readwrite import write_lifecycle_json, read_lifecycle_json
import networkx as nx
import os
@@ -51,3 +55,26 @@ def test_read_write_json(self):
cr = f.read()
readwrite.read_community_from_json_string(cr)
os.remove("coms.json")
+
+ def test_events_read_write(self):
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g)
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets")
+ write_lifecycle_json(events, "lifecycle.json")
+ e = read_lifecycle_json("lifecycle.json")
+ self.assertIsInstance(e, LifeCycle)
+ os.remove("lifecycle.json")
diff --git a/cdlib/test/test_nodeclustering.py b/cdlib/test/test_nodeclustering.py
index ab375a9a..f6010d67 100644
--- a/cdlib/test/test_nodeclustering.py
+++ b/cdlib/test/test_nodeclustering.py
@@ -77,3 +77,20 @@ def test_comparison(self):
self.assertIsInstance(coms.adjusted_mutual_information(coms2).score, float)
self.assertIsInstance(coms.adjusted_rand_index(coms2).score, float)
self.assertIsInstance(coms.variation_of_information(coms2).score, float)
+
+ self.assertIsInstance(coms.partition_closeness_simple(coms2).score, float)
+ self.assertIsInstance(coms.ecs(coms2).score, float)
+ self.assertIsInstance(coms.jaccard_index(coms2).score, float)
+ self.assertIsInstance(coms.rand_index(coms2).score, float)
+ self.assertIsInstance(coms.fowlkes_mallows_index(coms2).score, float)
+ self.assertIsInstance(coms.classification_error(coms2).score, float)
+ self.assertIsInstance(coms.czekanowski_index(coms2).score, float)
+ self.assertIsInstance(coms.dice_index(coms2).score, float)
+ self.assertIsInstance(coms.sorensen_index(coms2).score, float)
+ self.assertIsInstance(coms.rogers_tanimoto_index(coms2).score, float)
+ self.assertIsInstance(coms.southwood_index(coms2).score, float)
+ self.assertIsInstance(coms.mi(coms2).score, float)
+ self.assertIsInstance(coms.rmi(coms2).score, float)
+ self.assertIsInstance(coms.geometric_accuracy(coms2).score, float)
+ self.assertIsInstance(coms.overlap_quality(coms2).score, float)
+ self.assertIsInstance(coms.sample_expected_sim(coms2).score, float)
diff --git a/cdlib/test/test_partitions_comparisons.py b/cdlib/test/test_partitions_comparisons.py
index d2ca335a..4fff4b01 100644
--- a/cdlib/test/test_partitions_comparisons.py
+++ b/cdlib/test/test_partitions_comparisons.py
@@ -126,108 +126,76 @@ def test_clusim(self):
louvain_communities = louvain(g)
lp_communities = label_propagation(g)
- score = evaluation.ecs(
- louvain_communities, lp_communities
- )
+ score = evaluation.ecs(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.jaccard_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.jaccard_index(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.rand_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.rand_index(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.fowlkes_mallows_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.fowlkes_mallows_index(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.classification_error(
- louvain_communities, lp_communities
- )
+ score = evaluation.classification_error(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.czekanowski_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.czekanowski_index(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.dice_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.dice_index(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.sorensen_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.sorensen_index(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.rogers_tanimoto_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.rogers_tanimoto_index(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.southwood_index(
- louvain_communities, lp_communities
- )
+ score = evaluation.southwood_index(louvain_communities, lp_communities)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.mi(
- louvain_communities, lp_communities
- )
+ score = evaluation.mi(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.rmi(
- louvain_communities, lp_communities
- )
+ score = evaluation.rmi(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.geometric_accuracy(
- louvain_communities, lp_communities
- )
+ score = evaluation.geometric_accuracy(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.overlap_quality(
- louvain_communities, lp_communities
- )
+ score = evaluation.overlap_quality(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
- score = evaluation.sample_expected_sim(
- louvain_communities, lp_communities
- )
+ score = evaluation.sample_expected_sim(louvain_communities, lp_communities)
self.assertLessEqual(score.score, 1)
self.assertGreaterEqual(score.score, 0)
-
-
diff --git a/cdlib/test/test_temporal_clustering.py b/cdlib/test/test_temporal_clustering.py
index ae7f7f10..7777751b 100644
--- a/cdlib/test/test_temporal_clustering.py
+++ b/cdlib/test/test_temporal_clustering.py
@@ -40,37 +40,11 @@ def test_stability(self):
trend = tc.clustering_stability_trend(evaluation.normalized_mutual_information)
self.assertEqual(len(trend), len(tc.get_observation_ids()) - 1)
- def test_matching(self):
- tc = get_temporal_network_clustering()
- matches = tc.community_matching(
- lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y)), False
- )
- self.assertIsInstance(matches, list)
- self.assertIsInstance(matches[0], tuple)
- self.assertEqual(len(matches[0]), 3)
-
- matches = tc.community_matching(
- lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y)), True
- )
- self.assertIsInstance(matches, list)
- self.assertIsInstance(matches[0], tuple)
- self.assertEqual(len(matches[0]), 3)
-
- def test_lifecycle(self):
- tc = get_temporal_network_clustering()
- pt = tc.lifecycle_polytree(
- lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y)), True
- )
- self.assertIsInstance(pt, nx.DiGraph)
-
def test_community_access(self):
tc = get_temporal_network_clustering()
- pt = tc.lifecycle_polytree(
- lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y)), True
- )
- for cid in pt.nodes():
- com = tc.get_community(cid)
- self.assertIsInstance(com, list)
+
+ com = tc.get_community("0_0")
+ self.assertIsInstance(com, list)
def test_to_json(self):
tc = get_temporal_network_clustering()
@@ -78,12 +52,3 @@ def test_to_json(self):
self.assertIsInstance(js, str)
res = json.loads(js)
self.assertIsNone(res["matchings"])
-
- tc = get_temporal_network_clustering()
- tc.lifecycle_polytree(
- lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y)), True
- )
- js = tc.to_json()
- self.assertIsInstance(js, str)
- res = json.loads(js)
- self.assertIsNotNone(res["matchings"])
diff --git a/cdlib/test/test_viz_network.py b/cdlib/test/test_viz_network.py
index f2d05375..708ffe27 100644
--- a/cdlib/test/test_viz_network.py
+++ b/cdlib/test/test_viz_network.py
@@ -56,16 +56,16 @@ def test_community_graph(self):
os.remove("cg.pdf")
def test_highlighted_clusters(self):
-
+
g = nx.karate_club_graph()
coms = algorithms.louvain(g)
viz.plot_network_highlighted_clusters(g, coms)
-
+
plt.savefig("highlighted_clusters.pdf")
os.remove("highlighted_clusters.pdf")
-
+
coms = algorithms.demon(g, 0.25)
viz.plot_network_highlighted_clusters(g, coms)
-
+
plt.savefig("highlighted_clusters.pdf")
- os.remove("highlighted_clusters.pdf")
\ No newline at end of file
+ os.remove("highlighted_clusters.pdf")
diff --git a/cdlib/utils.py b/cdlib/utils.py
index 9ca5a3a8..ae9b74a9 100644
--- a/cdlib/utils.py
+++ b/cdlib/utils.py
@@ -225,7 +225,7 @@ def nx_node_integer_mapping(graph: object) -> tuple:
def remap_node_communities(communities: object, node_map: dict) -> list:
- """Apply a map to the obtained communities to retreive the original node labels
+ """Apply a map to the obtained communities to retrive the original node labels
:param communities: NodeClustering object
:param node_map: dictionary
diff --git a/cdlib/viz/__init__.py b/cdlib/viz/__init__.py
index b25e7426..a781342c 100644
--- a/cdlib/viz/__init__.py
+++ b/cdlib/viz/__init__.py
@@ -1,2 +1,3 @@
from .networks import *
from .plots import *
+from .community_events import *
diff --git a/cdlib/viz/community_events.py b/cdlib/viz/community_events.py
new file mode 100644
index 00000000..c3bd0e51
--- /dev/null
+++ b/cdlib/viz/community_events.py
@@ -0,0 +1,503 @@
+import matplotlib.pyplot as plt
+from cdlib import LifeCycle
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+
+from cdlib.lifecycles import utils
+from cdlib.lifecycles.algorithms.event_analysis import (
+ analyze_flow,
+ event_weights_from_flow,
+ events_all,
+)
+from cdlib.lifecycles.algorithms.measures import event_typicality
+
+__all__ = [
+ "plot_flow",
+ "plot_event_radar",
+ "plot_event_radars",
+ "typicality_distribution",
+]
+
+
+def _values_to_idx(links): # , all_labels):
+ df = links[["source", "target"]].copy()
+ all_labels = sorted(list(set(links["source"].tolist() + links["target"].tolist())))
+
+ df["source_ID"] = df["source"].apply(lambda x: all_labels.index(x))
+ df["target_ID"] = df["target"].apply(lambda x: all_labels.index(x))
+ df["value"] = links["value"]
+ return df
+
+
+def _color_links(links, color):
+ res = []
+ for _, row in links.iterrows():
+ if row["source"] == row["target"]:
+ res.append("rgba(0,0,0,0.0)")
+ elif "X" in row["source"]:
+ res.append("rgba(0,0,0,0.02)")
+ else:
+ res.append(color)
+ return res
+
+
+def _make_sankey(links, color, title, width=500, height=500, colors=None):
+ """ """
+ links["color"] = _color_links(links, color=color)
+ all_labels = sorted(list(set(links["source"].tolist() + links["target"].tolist())))
+ all_x = [int(l.split("_")[0]) for l in all_labels]
+ all_x = [(x - min(all_x)) / max(all_x) for x in all_x]
+ all_x = [x * 0.8 + 0.1 for x in all_x]
+ all_y = [0.5] * len(all_x)
+
+ node_colors = []
+ if isinstance(colors, list):
+ for l in all_labels:
+ if l in colors:
+ node_colors.append("green")
+ else:
+ node_colors.append("lightgrey")
+
+ fig = go.Figure(
+ data=[
+ go.Sankey(
+ arrangement="snap",
+ node=dict(
+ pad=10,
+ thickness=15,
+ line=dict(color="darkgray", width=0.2),
+ label=all_labels,
+ x=all_x,
+ y=all_y,
+ color=node_colors,
+ hovertemplate="Group size: %{value}",
+ ),
+ link=dict(
+ source=list(
+ (links["source_ID"])
+ ), # indices correspond to labels, e.g. A1, A2, A1, B1, ...
+ target=list((links["target_ID"])),
+ value=list((links["value"])),
+ color=list((links["color"])),
+ label=list((links["value"])),
+ ),
+ )
+ ]
+ )
+
+ # print(fig)
+ fig.update_layout(
+ font_size=10,
+ width=width,
+ height=height,
+ title={"text": title, "font": {"size": 25}}, # Set the font size here
+ )
+ return fig
+
+
+def _make_radar(values, categories, rescale, title="", color="green", ax=None):
+ pi = 3.14159
+ # number of variables
+ N = len(categories)
+
+ # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
+ angles = [n / float(N) * 2 * pi for n in range(N)]
+ angles.append(angles[0]) # to close the line
+ values = values.copy()
+ values.append(values[0]) # to close the line
+
+ # Initialise the spider plot
+ # ax = plt.subplot(4,4,row+1, polar=True, )
+ if ax is None:
+ ax = plt.subplot(
+ 111,
+ polar=True,
+ )
+
+ # If you want the first axis to be on top:
+ ax.set_theta_offset(pi / 2)
+ ax.set_theta_direction(-1)
+
+ # Draw one axe per variable + add labels labels yet
+ # plt.xticks(angles[:-1], categories, color='grey', size=10)
+ ax.set_xticks(angles[:-1], categories, color="blue", size=10)
+ # Draw ylabels
+ ax.set_rlabel_position(10)
+ ticks = list(np.linspace(0, 1, 5))
+
+ ax.set_rticks(ticks, [str(v) for v in ticks], color="grey", size=9)
+ ax.grid(True)
+
+ plt.gcf().canvas.draw()
+
+ angles_labels = np.rad2deg(angles)
+ angles_labels = [360 - a for a in angles_labels]
+ angles_labels = [180 + a if 90 < a < 270 else a for a in angles_labels]
+ labels = []
+ for label, angle in zip(ax.get_xticklabels(), angles_labels):
+ x, y = label.get_position()
+ lab = ax.text(
+ x,
+ y + 0.05,
+ label.get_text(),
+ transform=label.get_transform(),
+ ha=label.get_ha(),
+ va=label.get_va(),
+ color="grey",
+ size=11,
+ fontdict={"variant": "small-caps"},
+ )
+ lab.set_rotation(angle)
+ labels.append(lab)
+ ax.set_xticklabels([])
+
+ ax.plot(angles, values, color=color, linewidth=1.5, linestyle="solid")
+
+ ax.fill(angles, values, color="red", alpha=0.0)
+ if rescale:
+ ax.set_rmax(max(values) + 0.1)
+ else:
+ ax.set_rmax(1)
+ ax.set_rmin(0)
+ if title != "":
+ ax.set_title(title + "\n\n")
+ return ax
+
+
+def plot_flow(lc: LifeCycle, node_focus: str = None, slice: tuple = None) -> go.Figure:
+ """
+ Plot the flow of a lifecycle
+
+ :param lc: the lifecycle object
+ :param node_focus: plot only the flows involving this group. Defaults to None.
+ :param slice: plot only a slice of the lifecycle. Defaults to all.
+ :return: a plotly figure
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from cdlib.viz import plot_flow
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> fig = plot_flow(events)
+ >>> fig.show()
+ """
+ if lc.cm is not None:
+ lc = lc.cm
+ else:
+ raise ValueError("No temporal clustering set")
+
+ if slice is not None:
+ lc = lc.slice(*slice)
+ links = []
+ all_flows = lc.all_flows("+")
+ sum_out = 0
+ group_size = {}
+ for name, flow in all_flows.items():
+ nodes_group1 = lc.get_group(name)
+ group_size[name] = len(nodes_group1)
+ for name2, common in flow.items():
+ if node_focus is not None:
+ nodes_group2 = lc.get_group(name2)
+ if node_focus not in nodes_group2 | nodes_group1:
+ continue
+ link = (name, name2, len(common))
+ links.append(link)
+ sum_out += len(common)
+
+ links_df = pd.DataFrame(links, columns=["source", "target", "value"])
+ current_size_source = (
+ links_df[["source", "value"]].groupby("source").sum().reset_index()
+ )
+ current_size_target = (
+ links_df[["target", "value"]].groupby("target").sum().reset_index()
+ )
+ # join the two pd on group
+ current_size = current_size_source.merge(
+ current_size_target,
+ left_on="source",
+ right_on="target",
+ suffixes=("_source", "_target"),
+ how="outer",
+ )
+ # add column taking the non-null among source and target
+ current_size["sourceTarget"] = current_size["source"].fillna(current_size["target"])
+ current_size.fillna(0, inplace=True)
+ # add a column with the max of source and target
+ current_size["max"] = current_size[["value_source", "value_target"]].max(axis=1)
+ current_size.set_index("sourceTarget", inplace=True)
+ max_input_output = current_size.to_dict()["max"]
+
+ # check the case of groups without a single link
+ for name in lc.groups_ids():
+ if name not in max_input_output:
+ max_input_output[name] = 0
+
+ for name, size in max_input_output.items():
+ if size < group_size[name]: # and (sum_out>0 or node_focus is not None):
+ fake_size = group_size[name] - size
+ links.append((name, name, fake_size))
+ links_df = pd.DataFrame(links, columns=["source", "target", "value"])
+
+ # replace set_name by X_set_name
+ # all_labels = list(flow.keys()) + [set_name]
+ links_df = _values_to_idx(links_df)
+
+ groups_containing_node = None
+ if node_focus is not None:
+ groups_containing_node = [
+ name for name in all_flows.keys() if node_focus in lc.get_group(name)
+ ]
+
+ # print(links)
+ return _make_sankey(
+ links_df,
+ color="lightblue",
+ title="Flow",
+ width=800,
+ height=800,
+ colors=groups_containing_node,
+ )
+
+
+def plot_event_radar(
+ lc: LifeCycle,
+ set_name: str,
+ direction: str,
+ min_branch_size: int = 1,
+ rescale: bool = True,
+ color: str = "green",
+ ax: object = None,
+):
+ """
+ Plot the radar of event weights for a given event set.
+
+ :param lc: the lifecycle object
+ :param set_name: the event set name, e.g. "0_2"
+ :param direction: the direction of the event set, either "+" or "-"
+ :param min_branch_size: the minimum size of a branch to be considered, defaults to 1
+ :param rescale: rescale the radar to the maximum value, defaults to True
+ :param color: the color of the radar, defaults to "green"
+ :param ax: the matplotlib axis, defaults to None
+ :return: the matplotlib axis
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from cdlib.viz import plot_flow
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> fig = plot_event_radar(events, "0_2", "+")
+ >>> fig.show()
+
+ """
+ if lc.cm is not None:
+ lc = lc.cm
+ else:
+ raise ValueError("No temporal clustering set")
+
+ data = analyze_flow(
+ lc, set_name, direction=direction, min_branch_size=min_branch_size
+ )
+ a = {set_name: data}
+ weights = event_weights_from_flow(a, direction=direction)
+ return _make_radar(
+ list(weights[set_name].values()),
+ list(weights[set_name].keys()),
+ rescale=rescale,
+ color=color,
+ ax=ax,
+ )
+
+
+def plot_event_radars(
+ lc: LifeCycle, set_name: str, min_branch_size: int = 1, colors: object = None
+):
+ """
+ Plot the radar of event weights for a given event set in both directions.
+
+ :param lc: the lifecycle object
+ :param set_name: the event set name, e.g. "0_2"
+ :param min_branch_size: the minimum size of a branch to be considered, defaults to 1
+ :param colors: the colors of the radar, defaults to None
+ :return: None
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from cdlib.viz import plot_flow
+ >>> import matplotlib.pyplot as plt
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> plot_event_radars(events, "0_2")
+ >>> plt.show()
+
+
+ """
+
+ if colors is None:
+ colors = ["green", "red"]
+ plot_event_radar(
+ lc,
+ set_name,
+ direction="-",
+ min_branch_size=min_branch_size,
+ color=colors[0],
+ ax=plt.subplot(121, polar=True),
+ )
+ plot_event_radar(
+ lc,
+ set_name,
+ direction="+",
+ min_branch_size=min_branch_size,
+ color=colors[1],
+ ax=plt.subplot(122, polar=True),
+ )
+ plt.tight_layout()
+
+
+def typicality_distribution(
+ lc: LifeCycle,
+ direction: str,
+ width: int = 800,
+ height: int = 500,
+ showlegend: bool = True,
+):
+ """
+ Plot the distribution of typicality of events in a given direction.
+
+ :param lc: the lifecycle object
+ :param direction: the direction of the events, either "+" or "-"
+ :param width: the width of the figure, defaults to 800
+ :param height: the height of the figure, defaults to 500
+ :param showlegend: show the legend, defaults to True
+ :return: a matplotlib figure
+
+ :Example:
+
+ >>> from cdlib import TemporalClustering, LifeCycle
+ >>> from cdlib import algorithms
+ >>> from cdlib.viz import plot_flow
+ >>> from networkx.generators.community import LFR_benchmark_graph
+ >>> tc = TemporalClustering()
+ >>> for t in range(0, 10):
+ >>> g = LFR_benchmark_graph(
+ >>> n=250,
+ >>> tau1=3,
+ >>> tau2=1.5,
+ >>> mu=0.1,
+ >>> average_degree=5,
+ >>> min_community=20,
+ >>> seed=10,
+ >>> )
+ >>> coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ >>> tc.add_clustering(coms, t)
+ >>> events = LifeCycle(tc)
+ >>> events.compute_events("facets")
+ >>> fig = typicality_distribution(events, "+")
+ >>> fig.show()
+
+ """
+ if lc.cm is not None:
+ lc = lc.cm
+ else:
+ raise ValueError("No temporal clustering set")
+
+ events = events_all(lc)
+ all_specificicities = []
+ for group, event in events[direction].items():
+ all_specificicities.append(event_typicality(event))
+ df = pd.DataFrame(all_specificicities, columns=["event", "event_typicality"])
+ # round to 1 decimal so that it works for the histogram
+ df["event_typicality"] = df["event_typicality"].apply(lambda x: round(x, 1))
+ # replace 1 by 0.99 so that it is included in the last bin
+ df["event_typicality"] = df["event_typicality"].apply(
+ lambda x: 0.99 if x == 1 else x
+ )
+
+ fig = go.Figure()
+ for event in df["event"].unique():
+ fig.add_trace(
+ go.Histogram(
+ x=df[df["event"] == event]["event_typicality"],
+ name=event,
+ opacity=0.75,
+ xbins=dict(start=0, end=1.1, size=0.1),
+ )
+ )
+
+ possible_values = (
+ utils.forward_event_names()
+ if direction == "+"
+ else utils.backward_event_names()
+ )
+
+ categories_present = df["event"].unique()
+ for category in possible_values:
+ if category not in categories_present:
+ fig.add_trace(
+ go.Histogram(
+ x=[None],
+ name=category,
+ opacity=0.75,
+ xbins=dict(start=0, end=1.1, size=0.1),
+ showlegend=True,
+ )
+ ) # Empty histogram trace
+ for trace in fig.data:
+ trace.marker.color = utils.colormap()[trace.name]
+
+ fig.update_layout(showlegend=showlegend)
+ fig.update_layout(barmode="stack")
+
+ fig.update_xaxes(range=[0, 1.01], tickvals=np.arange(0, 1.01, 0.1))
+ # set figure size
+ fig.update_layout(width=width, height=height, template="simple_white")
+
+ return fig
diff --git a/cdlib/viz/networks.py b/cdlib/viz/networks.py
index 41bf60e9..1f9ccbbf 100644
--- a/cdlib/viz/networks.py
+++ b/cdlib/viz/networks.py
@@ -9,7 +9,11 @@
from community import community_louvain
from typing import Union
-__all__ = ["plot_network_clusters", "plot_network_highlighted_clusters", "plot_community_graph"]
+__all__ = [
+ "plot_network_clusters",
+ "plot_network_highlighted_clusters",
+ "plot_community_graph",
+]
# [r, b, g, c, m, y, k, 0.8, 0.2, 0.6, 0.4, 0.7, 0.3, 0.9, 0.1, 0.5]
COLOR = (
@@ -234,7 +238,7 @@ def plot_network_highlighted_clusters(
partition: NodeClustering,
position: dict = None,
figsize: tuple = (8, 8),
- node_size: int = 200, # 200 default value
+ node_size: int = 200, # 200 default value
plot_overlaps: bool = False,
plot_labels: bool = False,
cmap: object = None,
@@ -273,19 +277,19 @@ def plot_network_highlighted_clusters(
partition = __filter(partition.communities, top_k, min_size)
graph = convert_graph_formats(graph, nx.Graph)
-
+
# Assign weight of edge_weights_intracluster (default value is 200) or 1 to intra-community edges
for community in partition:
intra_community_edges = [(u, v) for u, v in graph.edges(community)]
for edge in intra_community_edges:
if all(node in community for node in edge):
- graph[edge[0]][edge[1]]['weight'] = edge_weights_intracluster
+ graph[edge[0]][edge[1]]["weight"] = edge_weights_intracluster
else:
- graph[edge[0]][edge[1]]['weight'] = 1
-
+ graph[edge[0]][edge[1]]["weight"] = 1
+
# Update node positions based on edge weights
- position = nx.spring_layout(graph, weight='weight', pos=position)
-
+ position = nx.spring_layout(graph, weight="weight", pos=position)
+
n_communities = len(partition)
if n_communities == 0:
warnings.warn("There are no communities that match the filter criteria.")
@@ -316,13 +320,23 @@ def plot_network_highlighted_clusters(
)
if isinstance(node_size, int):
fig = nx.draw_networkx_nodes(
- graph, position, node_size=node_size, node_color="w", nodelist=filtered_nodelist
+ graph,
+ position,
+ node_size=node_size,
+ node_color="w",
+ nodelist=filtered_nodelist,
)
fig.set_edgecolor("k")
-
- filtered_edge_widths = [1] * len(filtered_edgelist)
-
- nx.draw_networkx_edges(graph, position, alpha=0.25, edgelist=filtered_edgelist, width=filtered_edge_widths)
+
+ filtered_edge_widths = [1] * len(filtered_edgelist)
+
+ nx.draw_networkx_edges(
+ graph,
+ position,
+ alpha=0.25,
+ edgelist=filtered_edgelist,
+ width=filtered_edge_widths,
+ )
if plot_labels:
nx.draw_networkx_labels(
@@ -331,13 +345,13 @@ def plot_network_highlighted_clusters(
font_color=".8",
labels={node: str(node) for node in filtered_nodelist},
)
-
+
for i in range(n_communities):
if len(partition[i]) > 0:
if plot_overlaps:
- size = (n_communities - i) * node_size
+ size = (n_communities - i) * node_size
else:
- size = node_size
+ size = node_size
fig = nx.draw_networkx_nodes(
graph,
position,
@@ -346,24 +360,30 @@ def plot_network_highlighted_clusters(
node_color=[cmap(_norm(i))],
)
fig.set_edgecolor("k")
-
+
# Plotting highlighted clusters
for i, community in enumerate(partition):
if len(community) > 0:
# Extracting coordinates of community nodes
x_values = [position[node][0] for node in community]
y_values = [position[node][1] for node in community]
-
- min_x, max_x = min(x_values) , max(x_values)
- min_y, max_y = min(y_values) , max(y_values)
+
+ min_x, max_x = min(x_values), max(x_values)
+ min_y, max_y = min(y_values), max(y_values)
# Create a polygon using the min and max coordinates
- polygon = Polygon([(min_x, min_y), (max_x, min_y), (max_x, max_y), (min_x, max_y)],
- edgecolor=cmap(_norm(i)), facecolor=cmap(_norm(i)), alpha=0.3)
+ polygon = Polygon(
+ [(min_x, min_y), (max_x, min_y), (max_x, max_y), (min_x, max_y)],
+ edgecolor=cmap(_norm(i)),
+ facecolor=cmap(_norm(i)),
+ alpha=0.3,
+ )
plt.gca().add_patch(polygon)
-
+
# Extracting edges intra-community
- intra_community_edges = [(u, v) for u, v in graph.edges() if u in community and v in community]
+ intra_community_edges = [
+ (u, v) for u, v in graph.edges() if u in community and v in community
+ ]
# Plot edges intra-community with the color of the community and increased width
nx.draw_networkx_edges(
@@ -468,6 +488,7 @@ def calculate_cluster_sizes(partition: NodeClustering) -> Union[int, dict]:
else:
return cluster_sizes # Elements have different values, return the dictionary
+
def plot_community_graph(
graph: object,
partition: NodeClustering,
diff --git a/conda/meta.yaml b/conda/meta.yaml
index 8cbe7910..6bad2910 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,5 +1,5 @@
{% set name = "cdlib" %}
-{% set version = "0.3.1" %}
+{% set version = "0.4.0" %}
package:
name: "{{ name|lower }}"
@@ -32,6 +32,7 @@ requirements:
- thresholdclustering
- python-Levenshtein
- setuptools
+ - plotly
build:
- python
- setuptools
@@ -57,6 +58,7 @@ requirements:
- dynetx
- thresholdclustering
- python-Levenshtein
+ - plotly
about:
home: "https://github.com/GiulioRossetti/cdlib"
diff --git a/docs/conf.py b/docs/conf.py
index 6bc1f766..13919412 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -16,10 +16,13 @@
import sys, os
import sphinx_rtd_theme
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from cdlib import __version__
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+try:
+ from cdlib import __version__
+except ImportError:
+ __version__ = "0.4.0"
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
version = __version__
@@ -27,19 +30,16 @@
release = version
html_theme_options = {
- 'collapse_navigation': False,
- 'display_version': False,
+ "collapse_navigation": False,
+ "display_version": False,
"navigation_depth": 3,
}
# -- Project information -----------------------------------------------------
-project = 'CDlib'
-copyright = '2024, Giulio Rossetti'
-author = 'Giulio Rossetti'
-
-# The full version, including alpha/beta/rc tags
-release = '0.3.1'
+project = "CDlib"
+copyright = "2024, Giulio Rossetti"
+author = "Giulio Rossetti"
autodoc_mock_imports = [
"graph_tool.all",
@@ -99,6 +99,8 @@
"scipy.stats",
"clusim.sim",
"clusim.clustering",
+ "plotly",
+ "plotly.graph_objects",
]
# -- General configuration ---------------------------------------------------
@@ -114,24 +116,23 @@
]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-#html_theme = 'alabaster'
+# html_theme = 'alabaster'
html_logo = "cdlib_new.png"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
\ No newline at end of file
+html_static_path = ["_static"]
diff --git a/docs/installing.rst b/docs/installing.rst
index 830497a3..facf516b 100644
--- a/docs/installing.rst
+++ b/docs/installing.rst
@@ -77,7 +77,7 @@ graph-tool
----------
``CDlib`` integrates the support for SBM models offered by ``graph-tool``.
-To install it, refer to the official `documentation `_ and install the conda-forge version of the package (or the deb version if in a *nix system).
+To install it, refer to the official `documentation `_ and install the conda-forge version of the package (or the deb version if in a Unix system).
------
ASLPAw
diff --git a/docs/reference/algorithms.rst b/docs/reference/algorithms.rst
index 3d5a6d6d..4b6c21df 100644
--- a/docs/reference/algorithms.rst
+++ b/docs/reference/algorithms.rst
@@ -1,40 +1,27 @@
-==============================
-Community Discovery algorithms
-==============================
+==========================
+Static Community Discovery
+==========================
``CDlib`` collects implementations of several Community Discovery algorithms.
-To maintain the library organization as clean and resilient to changes as possible, the exposed algorithms are grouped following a simple rationale:
-
-1. Algorithms designed for static networks, and
-2. Algorithms designed for dynamic networks.
-
-Moreover, within each category, ``CDlib`` groups together approaches sharing the same high-level characteristics.
-
-In particular, static algorithms are organized into:
-
-- Those searching for a *crisp* partition of the node-set;
-- Those searching for an *overlapping* clustering of the node-set;
-- Those that search for a *fuzzy* partition of the node-set;
-- Those that cluster *edges*;
-- Those that are designed to partition *bipartite* networks;
-- Those that are designed to cluster *feature-rich* (node attributed) networks;
-- Those that search for *antichains* in DAG (directed acyclic graphs).
-
-Dynamic algorithms, conversely, are organized to resemble the taxonomy proposed in [Rossetti18]_
-
-- Instant Optimal,
-- Temporal Trade-off
-
-This documentation follows the same rationale.
+To maintain the library organization as clean and resilient to changes as possible, the exposed algorithms are grouped as:
.. toctree::
:maxdepth: 1
cd_algorithms/node_clustering.rst
cd_algorithms/edge_clustering.rst
- cd_algorithms/temporal_clustering.rst
+Moreover, node clustering algorithms are further divided to take into account the type of partition they search for:
+
+- *Crisp* partition (i.e., hard clustering)
+- *Overlapping* clustering (i.e., a node can belong to multiple communities);
+- *Fuzzy* partition (i.e., soft clustering);
+- *Bipartite* clustering (i.e., clustering of bipartite networks).
+- *Feature-rich* (node attributed) clustering (i.e., clustering of attributed networks leveraging both topology and node features).
+- *Antichains* clustering in DAG (directed acyclic graphs).
+
+For each algorithm, the documentation provides a brief description, the list of parameters, and the reference to the original paper.
----------------
Ensemble Methods
@@ -49,17 +36,3 @@ Learn how to (i) pool multiple algorithms on the same network, (ii) perform fitn
:maxdepth: 1
ensemble.rst
-
--------
-Summary
--------
-
-If you need a summary of the available algorithms and their properties (accepted graph types, community characteristics, computational complexity), refer to:
-
-.. toctree::
- :maxdepth: 1
-
- cd_algorithms/algorithms.rst
-
-
-.. [Rossetti18] Rossetti, Giulio, and Rémy Cazabet. "Community discovery in dynamic networks: a survey." ACM Computing Surveys (CSUR) 51.2 (2018): 1-37.
\ No newline at end of file
diff --git a/docs/reference/benchmark.rst b/docs/reference/benchmark.rst
index 7739e107..eb2bc309 100644
--- a/docs/reference/benchmark.rst
+++ b/docs/reference/benchmark.rst
@@ -22,7 +22,7 @@ All generators return a tuple: (``networkx.Graph``, ``cdlib.NodeClustering``)
.. autosummary::
- :toctree: bench/
+ :toctree: generated/
GRP
LFR
@@ -33,7 +33,7 @@ All generators return a tuple: (``networkx.Graph``, ``cdlib.NodeClustering``)
Benchmarks for node-attributed static networks.
.. autosummary::
- :toctree: bench/
+ :toctree: generated/
XMark
@@ -46,7 +46,7 @@ Time-evolving network topologies with planted community life cycles.
All generators return a tuple: (``dynetx.DynGraph``, ``cdlib.TemporalClustering``)
.. autosummary::
- :toctree: bench/
+ :toctree: generated/
RDyn
diff --git a/docs/reference/cd_algorithms/algorithms.rst b/docs/reference/cd_algorithms/algorithms.rst
deleted file mode 100644
index 720202cb..00000000
--- a/docs/reference/cd_algorithms/algorithms.rst
+++ /dev/null
@@ -1,201 +0,0 @@
-=================
-Algorithms' Table
-=================
-
-The following table shows an up-to-date list of the Community Detection algorithms made available within ``cdlib``.
-
-Algorithms are listed in alphabetical order along with:
-
-- a few additional information on the graph typologies they handle, and
-- the main expected characteristics of the clustering they produce,
-- (when available) the theoretical computational complexity estimated by their authors.
-
-Apart from a few reported exceptions, all algorithms are assumed to work on undirected and unweighted graphs.
-
-**Complexity notation.** When discussing the time complexity, the following notation is assumed:
-
-- *n*: number of nodes
-- *m*: number of edges
-- *k*: number of iterations
-- *c*: number of communities
-- *d*: average node degree
-
-+--------------------------------+-------------------------------------------------------------+--------------------------------------------------+-----------------+
-| | Network | Communities | Complexity |
-| Algorithm +-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| | Directed | Weighted | Bipartite | Feature-Rich | Temporal | Crisp | Overlaps | Nested | Fuzzy | Hierarchical | Time |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| agdl | x | x | | | | x | | | | | O(n^2) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| angel | | | | | | | x | | | | O(n) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| aslpaw | | | | | | | x | | | | O(kn) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| async_fluid | | | | | | x | | | | | O(m) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| belief | | | | | | x | | | | | O(kn) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| big_clam | | | | | | x | x | x | | | O(n) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| bimlpa | | | x | | | x | | | | | O(m) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| chinesewhispers | | x | | | | x | | | | | O(km) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| condor | | | x | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| conga | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| congo | | | | | | | x | | | | O(nm^2) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| core_expansion | | | | | | | x | | | | O(nlogn) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| cpm | | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| CPM_bipartite | | | x | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| coach | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| danmf | | x | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| dcs | | x | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| demon | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| der | | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| dpclus | | x | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| edmot | | x | x | | | x | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| ebgc | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| ego_networks | | | | | | | x | | | | O(m) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| egonet_splitter | | | | | | | x | | | | O(m^3/2 ) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| eigenvector | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| em | x | | | | | x | x | | x | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| endntm | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| eva | | | | x | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| frc_fgsn | | | x | | | | x | | x | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| ga | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| gdmp2 | x | | x | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| gemsec | x | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| girvan_newman | | | | | | x | | | | x | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| graph_entropy | | x | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| greedy_modularity | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| head_tail | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| hierarchical_link_communities | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| ilouvain | | | | x | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| infomap | x | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| infomap_bipartite | x | x | x | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| ipca | | x | | | | x | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| kclique | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| kcut | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| label_propagation | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| lais2 | | | | | | | x | | | | O(cm + n) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| leiden | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| lemon | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| lfm | | | | | | | x | | | x | O(n^2 logn) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| louvain | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| lpam | | | | | | | x | | | | O(2^m) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| lpanni | | | | | | | x | | | | O(n) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| lswl | | x | | | | x | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| lswl_plus | | x | | | | x | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| markov_clustering | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| mcode | | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| mnmf | | | | | | | x | | | | O(n^2*m+n^2*k) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| mod_m | | | | | | x | | | | | O(nd) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| mod_r | | | | | | x | | | | | O(nd) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| node_perception | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| multicom | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| nnsed | | | | | | | x | | | | O(kn^2) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| overlapping_seed_set_expansion | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| paris | | x | | | | x | | | | x | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| percomvc | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| principled_clustering | | | | | | | x | | x | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| pycombo | | x | | | | x | | | | | O(n^2 logc) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| rb_pots | x | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| rber_pots | | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| ricci_community | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| r_spectral_clustering | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| sbm_dl | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| sbm_dl_nested | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| scan | | | | | | x | | | | | O(m) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| scd | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| spectral | | | x | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| significance_communities | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| sibilarity_antichain | x (DAG) | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| slpa | | | | | | | x | | | | O(kn) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| spinglass | | | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| surprise_communities | x | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| symmnmf | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| threshold_clustering | x | x | | | | x | | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| tiles | | | | | x | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| umstmo | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| walkscan | | | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| walktrap | | | | | | x | | | | | O(n^2 logn) |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
-| wCommunity | | x | | | | | x | | | | |
-+--------------------------------+-----------+----------+-----------+--------------+-----------+-------+----------+--------+-------+--------------+-----------------+
\ No newline at end of file
diff --git a/docs/reference/cd_algorithms/edge_clustering.rst b/docs/reference/cd_algorithms/edge_clustering.rst
index e2833a74..3b0765e1 100644
--- a/docs/reference/cd_algorithms/edge_clustering.rst
+++ b/docs/reference/cd_algorithms/edge_clustering.rst
@@ -13,7 +13,7 @@ They return as result a ``EdgeClustering`` object instance.
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
hierarchical_link_community
diff --git a/docs/reference/cd_algorithms/node_clustering.rst b/docs/reference/cd_algorithms/node_clustering.rst
index 306260d0..8b72d916 100644
--- a/docs/reference/cd_algorithms/node_clustering.rst
+++ b/docs/reference/cd_algorithms/node_clustering.rst
@@ -1,17 +1,12 @@
-==========================
-Static Community Discovery
-==========================
-
----------------
+===============
Node Clustering
----------------
+===============
Algorithms falling in this category generate communities composed of nodes.
The communities can represent neat, *crisp*, partitions and *overlapping* or even *fuzzy* ones.
.. note::
The following lists are aligned to CD methods available in the *GitHub main branch* of `CDlib`_.
- In particular, the following algorithms are not yet released in the packaged version of the library: coach, mcode, ipca, dpclus, graph_entropy, ebgc, r_spectral_clustering.
.. automodule:: cdlib.algorithms
@@ -26,7 +21,7 @@ As a result, methods in this subclass return a ``NodeClustering`` object instanc
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
agdl
async_fluid
@@ -77,7 +72,7 @@ A clustering is said to be *overlapping* if any generic node can be assigned to
As a result, methods in this subclass return a ``NodeClustering`` object instance.
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
aslpaw
angel
@@ -117,7 +112,7 @@ A clustering is *fuzzy* if each node can belong (with a different degree of like
As a result, methods in this subclass return a ``FuzzyNodeClustering`` object instance.
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
frc_fgsn
principled_clustering
@@ -130,7 +125,7 @@ Node Attribute
As a result, methods in this subclass return a ``AttrNodeClustering`` object instance.
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
eva
ilouvain
@@ -143,7 +138,7 @@ Bipartite Graph Communities
As a result, methods in this subclass return a ``BiNodeClustering`` object instance.
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
bimlpa
condor
@@ -159,7 +154,7 @@ Antichain Communities
Methods in this subclass are designed to extract communities from Directed Acyclic Graphs (DAG) and return. As a result, a ``NodeClustering`` object instance.
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
siblinarity_antichain
@@ -172,7 +167,7 @@ Algorithms falling in this category generate communities composed of edges.
They return, as a result, a ``EdgeClustering`` object instance.
.. autosummary::
- :toctree: algs/
+ :toctree: ../generated/
hierarchical_link_community
diff --git a/docs/reference/classes.rst b/docs/reference/classes.rst
index 52203169..49b2afdc 100644
--- a/docs/reference/classes.rst
+++ b/docs/reference/classes.rst
@@ -27,10 +27,7 @@ Refer to the following documentation for a complete overview of the methods expo
classes/edge_clustering.rst
classes/temporal_clustering.rst
-
-------------------------------------------------
-Using Clustering objects with your algorithm
-------------------------------------------------
+.. note::
I have a clustering obtained by an algorithm not included in ``CDlib``. Can I load it in a Clustering object to leverage your library's evaluation and visualization facilities?
diff --git a/docs/reference/classes/lifecycle.rst b/docs/reference/classes/lifecycle.rst
new file mode 100644
index 00000000..6e399a5b
--- /dev/null
+++ b/docs/reference/classes/lifecycle.rst
@@ -0,0 +1,12 @@
+================
+LifeCycle Object
+================
+
+The LifeCycle object is a class that represents the life cycle of temporal communities extracted from a dynamic network.
+It is used to store the information about the in/out flows of nodes between communities and the from/to events they generate.
+
+.. currentmodule:: cdlib
+.. autoclass:: LifeCycle
+ :members:
+ :inherited-members:
+
diff --git a/docs/reference/classes/node_clustering.rst b/docs/reference/classes/node_clustering.rst
index bda6f8e8..855a8f6c 100644
--- a/docs/reference/classes/node_clustering.rst
+++ b/docs/reference/classes/node_clustering.rst
@@ -70,4 +70,21 @@ Comparing Node Clusterings
NodeClustering.adjusted_rand_index
NodeClustering.adjusted_mutual_information
NodeClustering.variation_of_information
+ NodeClustering.partition_closeness_simple
+ NodeClustering.ecs
+ NodeClustering.jaccard_index
+ NodeClustering.rand_index
+ NodeClustering.fowlkes_mallows_index
+ NodeClustering.classification_error
+ NodeClustering.czekanowski_index
+ NodeClustering.dice_index
+ NodeClustering.sorensen_index
+ NodeClustering.rogers_tanimoto_index
+ NodeClustering.southwood_index
+ NodeClustering.mi
+ NodeClustering.rmi
+ NodeClustering.geometric_accuracy
+ NodeClustering.overlap_quality
+ NodeClustering.sample_expected_sim
+
diff --git a/docs/reference/classes/temporal_clustering.rst b/docs/reference/classes/temporal_clustering.rst
index 537403db..d9bc1a6b 100644
--- a/docs/reference/classes/temporal_clustering.rst
+++ b/docs/reference/classes/temporal_clustering.rst
@@ -38,11 +38,4 @@ Evaluating Node Clustering
TemporalClustering.clustering_stability_trend
-Matching temporal clustering
-----------------------------
-
-.. autosummary::
-
- TemporalClustering.community_matching
- TemporalClustering.lifecycle_polytree
diff --git a/docs/reference/evaluation.rst b/docs/reference/evaluation.rst
index 3fe7f235..c8e60d5c 100644
--- a/docs/reference/evaluation.rst
+++ b/docs/reference/evaluation.rst
@@ -25,7 +25,7 @@ Fitness functions allow to summarize the characteristics of a computed set of co
.. automodule:: cdlib.evaluation
.. autosummary::
- :toctree: eval/
+ :toctree: generated/
avg_distance
avg_embeddedness
@@ -53,7 +53,7 @@ Fitness functions allow to summarize the characteristics of a computed set of co
Among the fitness function, a well-defined family of measures is the Modularity-based one:
.. autosummary::
- :toctree: eval/
+ :toctree: generated/
erdos_renyi_modularity
link_modularity
@@ -66,7 +66,7 @@ Among the fitness function, a well-defined family of measures is the Modularity-
Some measures will return an instance of ``FitnessResult`` that takes together min/max/mean/std values of the computed index.
.. autosummary::
- :toctree: eval/
+ :toctree: generated/
FitnessResult
@@ -78,7 +78,7 @@ It is often useful to compare different graph partitions to assess their resembl
``cdlib`` implements the following partition comparisons scores:
.. autosummary::
- :toctree: eval/
+ :toctree: generated/
adjusted_mutual_information
mi
@@ -110,7 +110,7 @@ It is often useful to compare different graph partitions to assess their resembl
Some measures will return an instance of ``MatchingResult`` that takes together the computed index's mean and standard deviation values.
.. autosummary::
- :toctree: eval/
+ :toctree: generated/
MatchingResult
@@ -153,19 +153,6 @@ All details on remote datasets can be found on the dedicated page.
datasets.rst
-^^^^^^^^^^^^^^^^^^
-Ranking Algorithms
-^^^^^^^^^^^^^^^^^^
-
-Once a set of alternative clusterings have been extracted from a given network, is there a way to select the *best* one given a set of target fitness functions?
-
-``cdlib`` exposes a few standard techniques to address such an issue: all details can be found on the dedicated documentation page.
-
-.. toctree::
- :maxdepth: 1
-
- validation.rst
-
.. _`cdlib`: https://github.com/GiulioRossetti/cdlib
.. [Peel17] Peel, Leto, Daniel B. Larremore, and Aaron Clauset. "The ground truth about metadata and community detection in networks." Science Advances 3.5 (2017): e1602548.
\ No newline at end of file
diff --git a/docs/reference/events.rst b/docs/reference/events.rst
new file mode 100644
index 00000000..9df3e4d1
--- /dev/null
+++ b/docs/reference/events.rst
@@ -0,0 +1,332 @@
+==============================
+Community Events and LifeCycle
+==============================
+
+Community events describe the changes in the community structure of a network over time.
+The community structure of a network can change due to the arrival or departure of nodes, the creation or dissolution of communities, or the merging or splitting of communities.
+
+The ``cdlib`` library provides a set of tools to analyze the evolution of communities over time, including the detection of community events and the analysis of community life cycles.
+
+The interface of the library is designed to be as simple as possible, allowing users to easily analyze the evolution of communities in their networks.
+
+Check the ``LifeCycle`` class for more details:
+
+.. toctree::
+ :maxdepth: 1
+
+ classes/lifecycle.rst
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Clustering with Explicit LifeCycle
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Some dynamic community detection algorithms (e.g., Temporal trade-off ones) provide an explicit representation of the life cycle of communities.
+In this case it is not necessary to detect community events as post-processing, as the life cycle of communities is already available.
+
+To analyze such pre-computed events apply the following snippet:
+
+.. code-block:: python
+
+ from cdlib import LifeCycle
+ from cdlib import algorithms
+ import dynetx as dn
+
+ dg = dn.DynGraph()
+ for x in range(10):
+ g = nx.erdos_renyi_graph(200, 0.05)
+ dg.add_interactions_from(list(g.edges()), t=x)
+ coms = algorithms.tiles(dg, 2)
+
+ lc = LifeCycle(coms)
+ lc.compute_events_from_explicit_matching()
+
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Clustering without Explicit LifeCycle
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In case the dynamic community detection algorithm does not provide an explicit representation of the life cycle of communities, the library provides a set of tools to detect community events and analyze the life cycle of communities.
+In particular, the library allows to identify events following four different strategies:
+
+- **Facets** events definition [Failla24]_
+- **Greene** events definition [Greene2010]_
+- **Asur** events definition [Asur2009]_
+- **Custom** events definition
+
+The first three strategies are based on the definition of community events proposed in the literature, while the last one allows users to define their own events.
+
+To apply one of the first three strategies, use the following snippet:
+
+.. code-block:: python
+
+ from cdlib import LifeCycle
+ from networkx.generators.community import LFR_benchmark_graph
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets") # or "greene" or "asur"
+
+.. note::
+ Each strategy has its parameters that can be specified passing a dictionary to the compute_events method.
+ In particular, the ``facets`` strategy requires the specification of the ``min_branch_size`` parameter (default 1), while ``greene`` and ``asur`` require the specification of the ``threshold`` parameter (default 0.1).
+
+
+To define custom events, use the following snippet:
+
+.. code-block:: python
+
+ from cdlib import LifeCycle
+ from networkx.generators.community import LFR_benchmark_graph
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ jaccard = lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y))
+ events.compute_events_with_custom_matching(jaccard, threshold=0.3, two_sided=True)
+
+In the above snippet, the ``jaccard`` function is used to define the similarity between two communities.
+The ``threshold`` parameter is used to define the minimum similarity required to consider two communities one an evolution of the other.
+Changing the similarity function and the threshold allows users to define their own matching strategies.
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+Analyzing Events and Flows
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Once the community events have been detected, the library provides a set of tools to analyze them.
+Each event is characterized by a set of properties, such as the type of event, the communities involved, the nodes involved, and the time of occurrence.
+
+.. note::
+
+ The library assigns a unique identifier to each community of the form ``t_c`` where ``t`` is the time of occurrence and ``c`` is the community identifier.
+ E.g., the community with identifier ``2_3`` is the community with identifier ``3`` at time ``2``.
+
+Each tracking strategy defines a different set of events (e.g., creation, dissolution, merging, splitting).
+However, ``cdlib`` generalize the concept of event breaking it down into four components. For each generic temporal community ``t_c`` it provide access to:
+
+- **In flow**: the set of nodes that have entered the community ``t_c`` from clusters of time ``t-1``;
+- **Out flow**: the set of nodes that will leave the community ``t_c`` at time ``t+1``;
+- **From Events**: the set of events that generate the community observed at ``t`` tha involved clusters at time ``t-1``;
+- **To Events**: the set of events community ``t_c`` starts at time ``t`` that will affect clusters at time ``t+1``;
+
+All these information can be summarized in a community temporal-dependency digraph called ``polytree``.
+
+Here an example of how to analyze community events and flows:
+
+.. code-block:: python
+
+ from cdlib import LifeCycle
+ from networkx.generators.community import LFR_benchmark_graph
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets") # or "greene" or "asur"
+ event_types = events.get_event_types() # provide the list of available events for the detected method (in this case for 'facets')
+
+ ev = events.get_event("1_2") # to compute events for all communities use the get_events() method
+ print(ev.out_flow) # to get the out flow of the community 1_2
+ print(ev.in_flow) # to get the in flow of the community 1_2
+ print(ev.from_event) # to get the from events of the community 1_2
+ print(ev.to_event) # to get the to events of the community 1_2
+
+ out_flow = events.analyze_flow("1_2", "+") # if the community id is not specified all the communities are considered
+ in_flow = events.analyze_flow("1_2", "-")
+
+Each event is characterized by its degree of importance for the actual status of the community.
+In particular, ``facets`` events are fuzzy events (more than one can occur at the same time) while ``greene`` and ``asur`` events are crisp events (only one can occur at the same time).
+
+.. note::
+ Following the ``facets`` terminology, ``analyze_flow`` and ``analyze_flows`` returns a dictionary describing the flow in terms of its Unicity, Identity and Outflow.
+ For a detailed description of such measures refer to [Failla24]_
+
+In addition, if the temporal network comes with attributes associated to the nodes (either dynamically changing or not - i.e., political leanings), the library provides a set of tools to analyze the typicality of the events.
+
+Setting and retreiving node attributes is straightforward:
+
+.. code-block:: python
+
+ from cdlib import LifeCycle
+ from networkx.generators.community import LFR_benchmark_graph
+
+ def random_leaning():
+ attrs = {}
+ for i in range(250): # 250 nodes
+ attrs[i] = {}
+ for t in range(10): # 10 time steps
+ attrs[i][t] = random.choice(["left", "right"])
+ return attrs
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets") # or "greene" or "asur"
+ events.set_attribute(random_leaning(), "political_leaning")
+ attrs = events.get_attribute("political_leaning")
+
+ events.analyze_flow("1_1", "+", attr="political_leaning") # to analyze the flow of political leaning in the community 1_1
+
+Attributes are stored as a dictionary of dictionaries where the first key is the node id and the second key is the time step.
+
+If such information is available, the ``analyze_flow`` method will integrate in its analysis an evaluation of flow-attribute entropy.
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Visualizing Events and Flows
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The library provides a set of tools to visualize the events and flows detected in the community structure of a network.
+
+.. note::
+
+ The library uses the ``networkx`` library to represent the community structure of a network and the ``matplotlib`` / ``plotly`` library to visualize it.
+
+Here an example of how to visualize community events, flows and polytree:
+
+.. code-block:: python
+
+ from cdlib import LifeCycle
+ from cdlib.viz import (
+ plot_flow,
+ plot_event_radar,
+ plot_event_radars,
+ typicality_distribution,
+ )
+ from networkx.generators.community import LFR_benchmark_graph
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets") # or "greene" or "asur"
+
+ fig = plot_flow(events)
+ fig.show()
+
+ fig = plot_event_radar(events, "1_2", direction="+") # only out events
+ fig.show()
+
+ fig = plot_event_radars(events, "1_2") # both in and out events
+ fig.show()
+
+ fig = typicality_distribution(events, "+")
+ fig.show()
+
+ dg = events.polytree()
+ fig = nx.draw_networkx(dg, with_labels=True)
+ fig.show()
+
+For a detailed description of the available methods and parameters, check the ``Visual Analytics`` section of the ``cdlib`` reference guide.
+
+^^^^^^^^^^^^^^^^
+Validating Flows
+^^^^^^^^^^^^^^^^
+
+The library provides a set of tools to statistically validate the observed flows against null models.
+
+Here an example of how to validate the observed flows:
+
+.. code-block:: python
+
+ from cdlib import LifeCycle
+ from cdlib.lifecycles.validation import validate_flow, validate_all_flows
+ from networkx.generators.community import LFR_benchmark_graph
+
+ tc = TemporalClustering()
+ for t in range(0, 10):
+ g = LFR_benchmark_graph(
+ n=250,
+ tau1=3,
+ tau2=1.5,
+ mu=0.1,
+ average_degree=5,
+ min_community=20,
+ seed=10,
+ )
+ coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
+ tc.add_clustering(coms, t)
+
+ events = LifeCycle(tc)
+ events.compute_events("facets") # or "greene" or "asur"
+
+ cf = events.flow_null("1_2", "+", iterations=1000) # validate the out flow of community 1_2. Iterations define the number of randomizations to perform.
+ vf = events.all_flows_null("+", iterations=1000) # validate all out flows
+
+Both validation methods return a dictionary keyed by set identifier and valued by mean, std, and p-value of the observed flow against the null model.
+
+.. automodule:: cdlib.lifecycles
+ :members:
+ :undoc-members:
+
+.. autosummary::
+ :toctree: generated/
+
+ flow_null
+ all_flows_null
+
+
+.. [Failla24] Andrea Failla, Rémy Cazabet, Giulio Rossetti, Salvatore Citraro . "Redefining Event Types and Group Evolution in Temporal Data.", arXiv preprint arXiv:2403.06771. 2024
+
+.. [Asur2009] Sitaram Asur, Parthasarathy Srinivasan, Ucar Duygu. "An event-based framework for characterizing the evolutionary behavior of interaction graphs." ACM Transactions on Knowledge Discovery from Data (TKDD) 3.4 (2009): 1-36.
+
+.. [Greene2010] Derek Greene, Doyle Donal, Cunningham, Padraig. "Tracking the evolution of communities in dynamic social networks." 2010 international conference on advances in social networks analysis and mining. IEEE, 2010.
\ No newline at end of file
diff --git a/docs/reference/readwrite.rst b/docs/reference/readwrite.rst
index fb2463b7..06a137c8 100644
--- a/docs/reference/readwrite.rst
+++ b/docs/reference/readwrite.rst
@@ -2,7 +2,11 @@
Input-Output
************
-Functions to save/load ``cdlib`` communities to/from file.
+Functions to save/load ``cdlib`` communities and events to/from file.
+
+^^^^^^^^^^^^^
+Community I/O
+^^^^^^^^^^^^^
----------
CSV format
@@ -32,6 +36,22 @@ JSON format allows the storage/loading of community discovery algorithm results
:toctree: generated/
read_community_json
+ read_community_from_json_string
write_community_json
-.. note:: JSON formatting allows only saving/retrieving all kinds of Clustering objects and maintaining all their metadata - except for the graph object instance.
\ No newline at end of file
+.. note:: JSON formatting allows only saving/retrieving all kinds of Clustering objects and maintaining all their metadata - except for the graph object instance.
+
+^^^^^^^^^^^^^^^^^^^^
+Community Events I/O
+^^^^^^^^^^^^^^^^^^^^
+
+Events are a fundamental concept in the context of dynamic community discovery. The following methods allow you to read/write events to/from CSV.
+
+.. autosummary::
+ :toctree: generated/
+
+ read_lifecycle_json
+ write_lifecycle_json
+
+
+
diff --git a/docs/reference/reference.rst b/docs/reference/reference.rst
index 1f2889ed..3c6ccf36 100644
--- a/docs/reference/reference.rst
+++ b/docs/reference/reference.rst
@@ -1,8 +1,8 @@
*********
-Reference
+API Guide
*********
-``cdlib``comprises several modules, each fulfilling a different task related to community detection.
+``cdlib`` comprises several modules, each fulfilling a different task related to community detection.
.. toctree::
@@ -10,7 +10,10 @@ Reference
classes.rst
algorithms.rst
+ temporal_clustering.rst
+ events.rst
evaluation.rst
+ validation.rst
viz.rst
readwrite.rst
utils.rst
\ No newline at end of file
diff --git a/docs/reference/cd_algorithms/temporal_clustering.rst b/docs/reference/temporal_clustering.rst
similarity index 83%
rename from docs/reference/cd_algorithms/temporal_clustering.rst
rename to docs/reference/temporal_clustering.rst
index f6a5f89e..fe02acf4 100644
--- a/docs/reference/cd_algorithms/temporal_clustering.rst
+++ b/docs/reference/temporal_clustering.rst
@@ -4,6 +4,13 @@ Dynamic Community Discovery
Algorithms falling in this category generate communities that evolve as time goes by.
+Dynamic algorithms are organized to resemble the taxonomy proposed in [Rossetti18]_
+
+- Instant Optimal,
+- Temporal Trade-off
+
+For all details on the available methods to extract and manipulate dynamic communities, please refer to the ``TemporalClustering`` documentation.
+
.. automodule:: cdlib.algorithms
@@ -34,14 +41,7 @@ Here is an example of a two-step built on top of Louvain partitions of a dynamic
coms = algorithms.louvain(g) # here any CDlib algorithm can be applied
tc.add_clustering(coms, t)
-For what concerns the second stage (snapshots' node clustering matching), it is possible to parametrize the set similarity function as follows (example made with a standard Jaccard similarity):
-
-.. code-block:: python
-
- jaccard = lambda x, y: len(set(x) & set(y)) / len(set(x) | set(y))
- matches = tc.community_matching(jaccard, two_sided=True)
-
-For all details on the available methods to extract and manipulate dynamic communities, please refer to the ``TemporalClustering`` documentation.
+For what concerns the second stage (snapshots' node clustering matching), refer to the ``Community Events and LifeCycle`` section of the ``cdlib`` documentation.
^^^^^^^^^^^^^^^^^^
Temporal Trade-Off
@@ -54,7 +54,13 @@ Dynamic Community Discovery algorithms falling into this category can be describ
- Initialization: find communities for the initial state of the network;
- Update: find communities at step t using graph at t and past information for each incoming step.
+Currently ``cdlib`` features the following Temporal Trade-off algorithms:
+
.. autosummary::
- :toctree: algs/
+ :toctree: generated/
tiles
+
+
+
+.. [Rossetti18] Rossetti, Giulio, and Rémy Cazabet. "Community discovery in dynamic networks: a survey." ACM Computing Surveys (CSUR) 51.2 (2018): 1-37.
\ No newline at end of file
diff --git a/docs/reference/validation.rst b/docs/reference/validation.rst
index 9a6c407c..608ce8c0 100644
--- a/docs/reference/validation.rst
+++ b/docs/reference/validation.rst
@@ -1,6 +1,11 @@
-******************
-Ranking Algorithms
-******************
+*******************************
+Validate CD Algorithms Rankings
+*******************************
+
+.. note::
+
+ Once a set of alternative clusterings have been extracted from a given network, is there a way to select the *best* one given a set of target fitness functions?
+
Let us assume that you ran a set **X** of community discovery algorithms on a given graph **G** and that you computed a set **Y** of fitness scores for each of the obtained clustering.
diff --git a/docs/reference/viz.rst b/docs/reference/viz.rst
index ed01665c..0d88fa05 100644
--- a/docs/reference/viz.rst
+++ b/docs/reference/viz.rst
@@ -36,4 +36,19 @@ Community evaluation outputs can be easily used to represent the main partition
plot_sim_matrix
plot_com_stat
plot_com_properties_relation
- plot_scoring
\ No newline at end of file
+ plot_scoring
+
+
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Dynamic Community Events plots
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Dynamic community detection algorithms can be evaluated using the dynamic community events framework. The results can be visualized using the following functions.
+
+.. autosummary::
+ :toctree: generated/
+
+ plot_flow
+ plot_event_radar
+ plot_event_radars
+ typicality_distribution
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index 725db7f1..20ada6e4 100644
--- a/environment.yml
+++ b/environment.yml
@@ -21,3 +21,4 @@ dependencies:
- dynetx
- thresholdclustering
- python-Levenshtein
+- plotly
diff --git a/requirements.txt b/requirements.txt
index 345f9a4c..55482c09 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,18 +1,19 @@
numpy
scikit-learn
tqdm
-networkx >= 3.0
+networkx>=3.0
demon
-python-louvain >= 0.16
-scipy >= 1.10
+python-louvain>=0.16
+scipy>=1.10
pulp
seaborn
pandas
eva_lcd
bimlpa
-python-igraph >= 0.10
+python-igraph>=0.10
angelcommunity
pooch
dynetx
thresholdclustering
-python-Levenshtein
\ No newline at end of file
+python-Levenshtein
+plotly