diff --git a/cdlib/lifecycles/algorithms/event_analysis.py b/cdlib/lifecycles/algorithms/event_analysis.py index a705d84..5f86503 100644 --- a/cdlib/lifecycles/algorithms/event_analysis.py +++ b/cdlib/lifecycles/algorithms/event_analysis.py @@ -69,14 +69,30 @@ def event_weights_from_flow(analyzed_flows: dict, direction: str) -> dict: def _compute_event_scores(analyzed_flow: dict) -> list: return [ - (analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * analyzed_flow["Outflow"], - (1 - analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * analyzed_flow["Outflow"], - (analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * analyzed_flow["Outflow"], - (1 - analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * analyzed_flow["Outflow"], - (analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * (1 - analyzed_flow["Outflow"]), - (1 - analyzed_flow["Unicity"]) * analyzed_flow["Identity"] * (1 - analyzed_flow["Outflow"]), - (analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * (1 - analyzed_flow["Outflow"]), - (1 - analyzed_flow["Unicity"]) * (1 - analyzed_flow["Identity"]) * (1 - analyzed_flow["Outflow"]), + (analyzed_flow["Unicity"]) + * (1 - analyzed_flow["Identity"]) + * analyzed_flow["Outflow"], + (1 - analyzed_flow["Unicity"]) + * (1 - analyzed_flow["Identity"]) + * analyzed_flow["Outflow"], + (analyzed_flow["Unicity"]) + * analyzed_flow["Identity"] + * analyzed_flow["Outflow"], + (1 - analyzed_flow["Unicity"]) + * analyzed_flow["Identity"] + * analyzed_flow["Outflow"], + (analyzed_flow["Unicity"]) + * analyzed_flow["Identity"] + * (1 - analyzed_flow["Outflow"]), + (1 - analyzed_flow["Unicity"]) + * analyzed_flow["Identity"] + * (1 - analyzed_flow["Outflow"]), + (analyzed_flow["Unicity"]) + * (1 - analyzed_flow["Identity"]) + * (1 - analyzed_flow["Outflow"]), + (1 - analyzed_flow["Unicity"]) + * (1 - analyzed_flow["Identity"]) + * (1 - analyzed_flow["Outflow"]), ] diff --git a/cdlib/lifecycles/algorithms/measures.py b/cdlib/lifecycles/algorithms/measures.py index 236084b..e2aec15 100644 --- a/cdlib/lifecycles/algorithms/measures.py +++ b/cdlib/lifecycles/algorithms/measures.py @@ -75,46 +75,6 @@ def _max_second_difference(labels): return max_val - second_largest -def _berger_parker_index(labels): - """ - Dominance index, the probability of the most frequent attribute value in the set - - Args: - labels (_type_): _description_ - - Returns: - _type_: _description_ - """ - n = len(labels) - counter = Counter(labels) - probabilities = [count / n for count in counter.values()] - max_val = np.max(probabilities) - return max_val - - -def _gini_index(labels): - - n = len(labels) - counter = Counter(labels) - probabilities = [count / n for count in counter.values()] - - array = np.array(probabilities) - """Calculate the Gini coefficient of a numpy array.""" - # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif - # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm - - array = array.flatten() # all values are treated equally, arrays must be 1d - if np.amin(array) < 0: - array -= np.amin(array) # values cannot be negative - array += 0.0000001 # values cannot be 0 - array = np.sort(array) # values must be sorted - index = np.arange(1, array.shape[0] + 1) # index per array element - n = array.shape[0] # number of array elements - return (np.sum((2 * index - n - 1) * array)) / ( - n * np.sum(array) - ) # Gini coefficient - - def facet_unicity(labels: list) -> float: """ the unicity facet quantifies the extent to which a target set comes from one (=1) or multiple (->0) flows. diff --git a/cdlib/lifecycles/classes/matching.py b/cdlib/lifecycles/classes/matching.py index 17d74d3..9d6944c 100644 --- a/cdlib/lifecycles/classes/matching.py +++ b/cdlib/lifecycles/classes/matching.py @@ -95,16 +95,6 @@ def __add_partition(self, partition: list) -> None: tmp = set() tmp.add(group) self.named_sets[name] = tmp - - elif self.dtype == dict: - for elem in group: - to_str = json.dumps(elem) - self.named_sets[name].add(to_str) - - elif self.dtype == list: - for elem in group: - to_str = str(elem) - self.named_sets[name].add(to_str) else: raise NotImplementedError("dtype not supported") diff --git a/cdlib/test/test_events.py b/cdlib/test/test_events.py index 44425df..3676bd6 100644 --- a/cdlib/test/test_events.py +++ b/cdlib/test/test_events.py @@ -1,7 +1,13 @@ import unittest +import cdlib from cdlib import algorithms from cdlib import LifeCycle from cdlib import TemporalClustering +from cdlib.lifecycles.algorithms.event_analysis import ( + facets, + event_weights, + event as evn, +) from plotly import graph_objects as go import networkx as nx from networkx.generators.community import LFR_benchmark_graph @@ -79,7 +85,9 @@ def test_custom_matching(self): c = events.analyze_flows("+") self.assertIsInstance(c, dict) - events.compute_events_with_custom_matching(jaccard, two_sided=False, threshold=0) + events.compute_events_with_custom_matching( + jaccard, two_sided=False, threshold=0 + ) c = events.analyze_flows("+") self.assertIsInstance(c, dict) @@ -205,7 +213,7 @@ def random_attributes(): attrs = events.get_attribute("fakeattribute") self.assertIsInstance(attrs, dict) - events.analyze_flow("1_1", "+", attr="fakeattribute") + events.analyze_flow("1_1", "+", attr="fakeattribute") self.assertIsInstance(attrs, dict) ev = events.get_event("1_1") @@ -218,8 +226,43 @@ def random_attributes(): a = ev.to_event # to get the to events of the community 1_2 self.assertIsInstance(a, dict) + def test_marginal(self): + tc = TemporalClustering() + for t in range(0, 10): + g = LFR_benchmark_graph( + n=250, + tau1=3, + tau2=1.5, + mu=0.1, + average_degree=5, + min_community=20, + seed=10, + ) + coms = algorithms.louvain(g) # here any CDlib algorithm can be applied + tc.add_clustering(coms, t) + events = LifeCycle(tc) + events.compute_events("facets") + # marginal tests (not all methods are tested since they are not of use in cdlib - + # they are invoked for completeness) + self.assertIsInstance( + events.cm.slice(0, 5), cdlib.lifecycles.classes.matching.CommunityMatching + ) + self.assertIsInstance(events.cm.universe_set(), set) + self.assertIsInstance(list(events.cm.group_iterator()), list) + self.assertIsInstance(list(events.cm.group_iterator(3)), list) + events.cm.filter_on_group_size(1, 100) + events.cm.get_element_membership(1) + events.cm.get_all_element_memberships() + events.get_events() + events.get_event_types() + ev = events.get_event("1_1") + ev.get_from_event() + ev.get_to_event() + facets((events.cm), "0_2", "+") + event_weights(events.cm, "0_2", "+") + evn(events.cm, "0_2", "+") if __name__ == "__main__": diff --git a/cdlib/test/test_nodeclustering.py b/cdlib/test/test_nodeclustering.py index 9703f5c..f6010d6 100644 --- a/cdlib/test/test_nodeclustering.py +++ b/cdlib/test/test_nodeclustering.py @@ -94,5 +94,3 @@ def test_comparison(self): self.assertIsInstance(coms.geometric_accuracy(coms2).score, float) self.assertIsInstance(coms.overlap_quality(coms2).score, float) self.assertIsInstance(coms.sample_expected_sim(coms2).score, float) - -