From 93ab7489b2caf37394cd71c89e4217a023fd8181 Mon Sep 17 00:00:00 2001 From: ntfrgl Date: Wed, 21 Feb 2024 05:12:18 -0800 Subject: [PATCH] MAINT: Refactor caching (#124, #148, #153) Improve the approach in reverted commits fe23f23, 0166cd2: - introduce `@Cached.method()` based on `@functools.lru_cache()` - track mutable dependencies for cache invalidation - fix cache clearing helper - add behavioural spec in `tests/test_core/test_cache.py` Adjustments in existing classes in this commit: - define `__cache_state__()` for all subclasses of `Cached` (might require updates when adding further methods to the cache) - define `@Cached.method(attrs=(...))` where appropriate - remove obsolete class-specific caching mechanisms - factor out cached worker: `Network.nsi_betweenness()` - define helper: `Network.find_link_attribute()` - define helper: `ClimateNetwork._weighted_metric()` Adding this behaviour to classes without caching should be straightforward. Classes that remain to be adjusted (still have a `clear_cache()` method or subclass `Cached` without conforming to its protocoll): - `ClimateNetwork` and its subclasses: The recursive interaction between `__init__()`, `_similarity_measure` and `_regenerate_network()` across the class hierarchy is very stateful and does not fit into the regular OO dependency pattern assumed by `Cached`. A redesign of this logic would be advisable, but is left for future work. - `RecurrenceNetwork` and its subclasses, and `Surrogate`: Left as an exercise for the reader. --- src/pyunicorn/climate/climate_data.py | 148 ++----- src/pyunicorn/climate/climate_network.py | 68 ++- .../climate/coupled_climate_network.py | 15 +- src/pyunicorn/climate/coupled_tsonis.py | 11 +- .../climate/eventseries_climatenetwork.py | 4 +- src/pyunicorn/climate/havlin.py | 124 ++---- src/pyunicorn/climate/hilbert.py | 38 +- src/pyunicorn/climate/mutual_info.py | 51 +-- src/pyunicorn/climate/tsonis.py | 13 +- src/pyunicorn/core/__init__.py | 2 +- src/pyunicorn/core/cache.py | 179 ++++++++ src/pyunicorn/core/data.py | 31 +- src/pyunicorn/core/geo_grid.py | 86 +--- src/pyunicorn/core/geo_network.py | 45 +- src/pyunicorn/core/grid.py | 68 +-- src/pyunicorn/core/interacting_networks.py | 7 - src/pyunicorn/core/network.py | 418 +++++++----------- src/pyunicorn/core/spatial_network.py | 44 +- src/pyunicorn/eventseries/event_series.py | 40 +- src/pyunicorn/timeseries/_ext/numerics.pyx | 26 +- .../timeseries/cross_recurrence_plot.py | 149 +++---- .../timeseries/joint_recurrence_plot.py | 73 +-- src/pyunicorn/timeseries/recurrence_plot.py | 334 ++++++-------- tests/test_core/test_cache.py | 321 ++++++++++++++ .../test_joint_recurrence_plot.py | 9 +- tests/test_timeseries/test_timeseries.py | 4 +- 26 files changed, 1144 insertions(+), 1164 deletions(-) create mode 100644 src/pyunicorn/core/cache.py create mode 100644 tests/test_core/test_cache.py diff --git a/src/pyunicorn/climate/climate_data.py b/src/pyunicorn/climate/climate_data.py index cba9e55..14f5ce2 100644 --- a/src/pyunicorn/climate/climate_data.py +++ b/src/pyunicorn/climate/climate_data.py @@ -16,21 +16,16 @@ Provides classes for generating and analyzing complex climate networks. """ -# -# Import essential packages -# +from typing import Tuple, Hashable -# Import NumPy for the array object and fast numerics import numpy as np from numpy import random from ..core import Data +from ..core.cache import Cached -# -# Define class ClimateData -# -class ClimateData(Data): +class ClimateData(Data, Cached): """ Encapsulates spatio-temporal climate data. @@ -73,6 +68,9 @@ def __init__(self, observable, grid, time_cycle, anomalies=False, :arg dict window: Spatio-temporal window to select a view on the data. :arg int silence_level: The inverse level of verbosity of the object. """ + self._mut_window = 0 + """mutation count""" + Data.__init__(self, observable=observable, grid=grid, observable_name=observable_name, observable_long_name=observable_long_name, @@ -83,18 +81,15 @@ def __init__(self, observable, grid, time_cycle, anomalies=False, """(number (int)) - The annual cycle length of the data (units of samples).""" - # Set flags - self._flag_phase_mean = False - self._phase_mean = None - self.data_source = "" # If data are anomalies skip automatic calculation of anomalies - if anomalies: - self._flag_anomaly = True - self._anomaly = observable - else: - self._flag_anomaly = False + self.anomalies = anomalies + + def __cache_state__(self) -> Tuple[Hashable, ...]: + # The following attributes are assumed immutable: + # (_full_observable) + return (self._mut_window,) def __str__(self): """ @@ -102,23 +97,6 @@ def __str__(self): """ return 'ClimateData:\n' + Data.__str__(self) - def clear_cache(self): - """ - Clean up cache. - - Is reversible, since all cached information can be recalculated from - basic data. - """ - Data.clear_cache(self) - - if self._flag_phase_mean: - del self._phase_mean - self._flag_phase_mean = False - - if self._flag_anomaly: - del self._anomaly - self._flag_anomaly = False - # # Define alternative constructors # @@ -304,7 +282,8 @@ def indices_selected_months(self, selected_months): raise NotImplementedError("Currently only time cycles 12 and 360 \ are supported") - def _calculate_phase_mean(self): + @Cached.method(name="climatological mean values") + def phase_mean(self): """ Calculate mean values of observable for each phase of the annual cycle. @@ -318,35 +297,6 @@ def _calculate_phase_mean(self): :rtype: 2D Numpy array [cycle index, node index] :return: the mean values of observable for each phase of the annual cycle. - """ - if self.silence_level <= 1: - print("Calculating climatological mean values...") - - # Get raw data - observable = self.observable() - # Get time cycle - time_cycle = self.time_cycle - - # Get number of time series - N = observable.shape[1] - - # Initialize - phase_mean = np.zeros((time_cycle, N)) - - # Calculate mean value for each day (month) on each node - for i in range(time_cycle): - phase_mean[i, :] = observable[i::time_cycle, :].mean(axis=0) - - return phase_mean - - def phase_mean(self): - """ - Return mean values of observable for each phase of the annual cycle. - - For further comments, see :meth:`_calculate_phase_mean`. - - .. note:: - Only the currently selected spatio-temporal window is considered. **Example:** @@ -356,18 +306,19 @@ def phase_mean(self): [ 0.6984, 0.1106, -0.6984, -0.1106, 0.6984, 0.1106], [ 0.6984, -0.1106, -0.6984, 0.1106, 0.6984, -0.1106], [ 0.63 , -0.321 , -0.63 , 0.321 , 0.63 , -0.321 ]]) - - :rtype: 2D Numpy array [cycle index, node index] - :return: the mean values of observable for each phase of the annual - cycle. """ - if not self._flag_phase_mean: - self._phase_mean = self._calculate_phase_mean() - self._flag_phase_mean = True + observable = self.observable() + time_cycle = self.time_cycle + N = observable.shape[1] + phase_mean = np.zeros((time_cycle, N)) - return self._phase_mean + # Calculate mean value for each day (month) on each node + for i in range(time_cycle): + phase_mean[i, :] = observable[i::time_cycle, :].mean(axis=0) + return phase_mean - def _calculate_anomaly(self): + @Cached.method(name="daily (monthly) anomaly values") + def anomaly(self): """ Calculate anomaly time series from observable. @@ -380,53 +331,32 @@ def _calculate_anomaly(self): :rtype: 2D Numpy array [time, node index] :return: the anomalized time series. + + **Example:** + + >>> r(ClimateData.SmallTestData().anomaly()[:,0]) + array([-0.5 , -0.321 , -0.1106, 0.1106, 0.321 , + 0.5 , 0.321 , 0.1106, -0.1106, -0.321 ]) """ - if self.silence_level <= 1: - print("Calculating daily (monthly) anomaly values...") + # If data are anomalies skip automatic calculation of anomalies + if self.anomalies: + return self._full_observable - # Get raw data observable = self.observable() - # Get time cycle time_cycle = self.time_cycle - # Initialize array anomaly = np.zeros(observable.shape) # Thanks to Jakob Runge for i in range(time_cycle): sample = observable[i::time_cycle, :] anomaly[i::time_cycle, :] = sample - sample.mean(axis=0) - return anomaly - def anomaly(self): - """ - Return anomaly time series from observable. - - For further comments, see :meth:`_calculate_anomaly`. - - .. note:: - Only the currently selected spatio-temporal window is considered. - - **Example:** - - >>> r(ClimateData.SmallTestData().anomaly()[:,0]) - array([-0.5 , -0.321 , -0.1106, 0.1106, 0.321 , - 0.5 , 0.321 , 0.1106, -0.1106, -0.321 ]) - - :rtype: 2D Numpy array [time, node index] - :return: the anomalized time series. - """ - if not self._flag_anomaly: - self._anomaly = self._calculate_anomaly() - self._flag_anomaly = True - - return self._anomaly - def anomaly_selected_months(self, selected_months): """ Return anomaly time series from observable for selected months. - For further comments, see :meth:`_calculate_anomaly`. + For further comments, see :meth:`anomaly`. .. note:: Only the currently selected spatio-temporal window is considered. @@ -507,9 +437,8 @@ def set_window(self, window): :arg window: The spatio-temporal window to select a view on the data. """ Data.set_window(self, window) - - self._flag_phase_mean = False - self._flag_anomaly = False + # invalidate cache + self._mut_window += 1 def set_global_window(self): """ @@ -532,6 +461,5 @@ def set_global_window(self): array([ 0., 5., 10., 15., 20., 25.], dtype=float32) """ Data.set_global_window(self) - - self._flag_phase_mean = False - self._flag_anomaly = False + # invalidate cache + self._mut_window += 1 diff --git a/src/pyunicorn/climate/climate_network.py b/src/pyunicorn/climate/climate_network.py index 7650e92..e1b924b 100644 --- a/src/pyunicorn/climate/climate_network.py +++ b/src/pyunicorn/climate/climate_network.py @@ -16,25 +16,15 @@ Provides classes for generating and analyzing complex climate networks. """ -# -# Import essential packages -# +from typing import Tuple, Hashable, Callable -# Import NumPy for the array object and fast numerics import numpy as np - -# Import iGraph for high performance graph theory tools written in pure ANSI-C import igraph -# Import GeoNetwork and GeoGrid classes +from ..core.cache import Cached from ..core import GeoNetwork, GeoGrid -from ..core.network import cached_const -# -# Define class ClimateNetwork -# - class ClimateNetwork(GeoNetwork): """ @@ -50,9 +40,9 @@ class ClimateNetwork(GeoNetwork): # Definitions of internal methods # - def __init__(self, grid, similarity_measure, threshold=None, - link_density=None, non_local=False, directed=False, - node_weight_type="surface", silence_level=0): + def __init__(self, grid: GeoGrid, similarity_measure: np.ndarray, + threshold=None, link_density=None, non_local=False, + directed=False, node_weight_type="surface", silence_level=0): """ Initialize an instance of :class:`ClimateNetwork`. @@ -81,10 +71,17 @@ def __init__(self, grid, similarity_measure, threshold=None, :arg int silence_level: The inverse level of verbosity of the object. """ # Initialize - self.grid = grid + assert isinstance(grid, GeoGrid) + self.grid: GeoGrid = grid self.directed = directed self.silence_level = silence_level + # mutation count + if not hasattr(self, "_mut_clim"): + self._mut_clim: int = 0 + else: + self._mut_clim += 1 + # FIXME: Is taking the absolute value by default OK? self._similarity_measure = np.abs(similarity_measure.astype("float32")) self._non_local = non_local @@ -105,6 +102,12 @@ def __init__(self, grid, similarity_measure, threshold=None, node_weight_type=self.node_weight_type, silence_level=self.silence_level) + def __cache_state__(self) -> Tuple[Hashable, ...]: + return GeoNetwork.__cache_state__(self) + (self._mut_clim,) + + def __rec_cache_state__(self) -> Tuple[object, ...]: + return (self.grid,) + def __str__(self): """ Return a string representation of the ClimateNetwork object. @@ -126,26 +129,9 @@ def __str__(self): f'Threshold: {self.threshold()}\n' + f'Local connections filtered out: {self.non_local()}') - def clear_cache(self, irreversible=False): - """ - Clean up cache. - - If irreversible=True, the network cannot be recalculated using a - different threshold, or link density. - - :arg bool irreversible: The irreversibility of clearing the cache. - """ - GeoNetwork.clear_cache(self) - - if irreversible: - try: - del self._similarity_measure - except AttributeError: - pass - def _regenerate_network(self): """ - Regenerate the current climate network according to new similarity + Regenerate the current climate network according to a new similarity measure. """ ClimateNetwork.__init__(self, grid=self.data.grid, @@ -289,9 +275,8 @@ def Load(filename, fileformat=None, silence_level=0, *args, **kwds): # Overwrite igraph Graph object in Network instance to restore link # attributes/weights net.graph = graph - # Restore link attributes/weights - net.clear_paths_cache() - + # invalidate cache + net._mut_la += 1 return net # @@ -626,7 +611,7 @@ def set_link_density(self, link_density): threshold = self.threshold_from_link_density(link_density) self.set_threshold(threshold) - @cached_const('base', 'correlation_distance') + @Cached.method() def correlation_distance(self): """ Return correlation weighted distances between nodes. @@ -658,7 +643,7 @@ def correlation_distance(self): """ return self.similarity_measure() * self.grid.angular_distance() - @cached_const('base', 'inv_correlation_distance') + @Cached.method() def inv_correlation_distance(self): """ Return correlation weighted distances between nodes. @@ -719,3 +704,8 @@ def local_correlation_distance_weighted_vulnerability(self): """ self.inv_correlation_distance() return self.local_vulnerability('inv_correlation_distance') + + def _weighted_metric(self, attr: str, calc: Callable, metric: str): + if not self.find_link_attribute(attr): + self.set_link_attribute(attr, calc()) + return getattr(self, metric)(attr) diff --git a/src/pyunicorn/climate/coupled_climate_network.py b/src/pyunicorn/climate/coupled_climate_network.py index 6bae9fd..568d07e 100644 --- a/src/pyunicorn/climate/coupled_climate_network.py +++ b/src/pyunicorn/climate/coupled_climate_network.py @@ -16,26 +16,13 @@ Provides classes for generating and analyzing complex coupled climate networks. """ -# -# Import essential packages -# - -# Import NumPy for the array object and fast numerics import numpy as np -# Import climate_network for ClimateNetwork class -from .climate_network import ClimateNetwork - -# Import grid for GeoGrid class from ..core import InteractingNetworks, GeoNetwork, GeoGrid +from .climate_network import ClimateNetwork -# -# Define class CoupledClimateNetwork -# - class CoupledClimateNetwork(InteractingNetworks, ClimateNetwork): - """ Encapsulates a coupled similarity network embedded on a spherical surface. diff --git a/src/pyunicorn/climate/coupled_tsonis.py b/src/pyunicorn/climate/coupled_tsonis.py index 22d4e29..72525ac 100644 --- a/src/pyunicorn/climate/coupled_tsonis.py +++ b/src/pyunicorn/climate/coupled_tsonis.py @@ -16,21 +16,12 @@ Provides classes for generating and analyzing complex coupled climate networks. """ -# -# Import essential packages -# - -# Import NumPy for the array object and fast numerics import numpy as np -# Import climate_network for CoupledClimateNetwork class from .coupled_climate_network import CoupledClimateNetwork -# -# Define class CoupledClimateNetwork -# - +# pylint: disable=too-many-ancestors class CoupledTsonisClimateNetwork(CoupledClimateNetwork): """ diff --git a/src/pyunicorn/climate/eventseries_climatenetwork.py b/src/pyunicorn/climate/eventseries_climatenetwork.py index 8c5605b..05e78fa 100644 --- a/src/pyunicorn/climate/eventseries_climatenetwork.py +++ b/src/pyunicorn/climate/eventseries_climatenetwork.py @@ -161,9 +161,11 @@ def __init__(self, data, method='ES', p_value=None, **kwargs): # option measure_matrix = [] + # required here for bootstrapping the cache hierarchy + self.grid = None + # If standard ES/ECA measure is chosen, calculate pairwise ES/ECA # scores - if self.__method in ['ES', 'ECA']: measure_matrix = \ self.event_series_analysis(method=self.__method, diff --git a/src/pyunicorn/climate/havlin.py b/src/pyunicorn/climate/havlin.py index 8aec347..11855dd 100644 --- a/src/pyunicorn/climate/havlin.py +++ b/src/pyunicorn/climate/havlin.py @@ -16,24 +16,15 @@ Provides classes for generating and analyzing complex climate networks. """ -# -# Import essential packages -# +from typing import Tuple, Hashable -# Import NumPy for the array object and fast numerics import numpy as np - -# Import tqdm for easy progress bar handling from tqdm import trange -# Import cnNetwork for Network base class +from .climate_data import ClimateData from .climate_network import ClimateNetwork -# -# Define class HavlinClimateNetwork -# - class HavlinClimateNetwork(ClimateNetwork): """ @@ -90,11 +81,13 @@ def __init__(self, data, max_delay, threshold=None, link_density=None, # Set instance variables self._max_delay = 0 self._correlation_lag = None - self.data = data - """(ClimateData) - The climate data used for network construction.""" + assert isinstance(data, ClimateData) + self.data: ClimateData = data + """The climate data used for network construction.""" self.N = data.grid.N self._prescribed_link_density = link_density + self._mut_clim: int = 0 self._set_max_delay(max_delay) ClimateNetwork.__init__(self, grid=self.data.grid, similarity_measure=self._similarity_measure, @@ -105,6 +98,12 @@ def __init__(self, data, max_delay, threshold=None, link_density=None, node_weight_type=node_weight_type, silence_level=silence_level) + def __cache_state__(self) -> Tuple[Hashable, ...]: + return ClimateNetwork.__cache_state__(self) + + def __rec_cache_state__(self) -> Tuple[object, ...]: + return ClimateNetwork.__rec_cache_state__(self) + (self.data,) + def __str__(self): """ Return a string version of the instance of HavlinClimateNetwork. @@ -114,22 +113,14 @@ def __str__(self): Maximum delay used for correlation strength estimation: \ {self.get_max_delay()}' - def clear_cache(self, irreversible=False): + def clear_cache(self): """ Clean up cache. - - If irreversible=True, the network cannot be recalculated using a - different threshold, or link density. - - :arg bool irreversible: The irreversibility of clearing the cache. """ - ClimateNetwork.clear_cache(self, irreversible) - - if irreversible: - try: - del self._correlation_lag - except AttributeError: - pass + try: + del self._correlation_lag + except AttributeError: + pass # # Defines methods to calculate correlation strength and lags @@ -152,23 +143,12 @@ def _calculate_correlation_strength(self, anomaly, max_delay, gamma=0.2): :rtype: tuple of two 2D arrays [index, index] :return: the correlation strength and maximum lag matrices. """ - if self.silence_level <= 1: - print("Calculating correlation strength matrix " - "following [Yamasaki2008]_...") - - # Initialize N = self.N - - # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) - - # Apply cosine window to anomaly data anomaly *= self.data.cos_window(anomaly, gamma) - # Zero pad windowed data to set the length of each time series to # a power of two anomaly = self.data.zero_pad_data(anomaly) - correlation_strength = np.empty((N, N)) max_lag_matrix = np.empty((N, N)) @@ -217,10 +197,8 @@ def _set_max_delay(self, max_delay): :arg int max_delay: The maximum delay for cross-correlation functions. """ - # Set class variable _max_delay + self._mut_clim += 1 self._max_delay = max_delay - - # Calculate correlation strength and lag results = self._calculate_correlation_strength(self.data.anomaly(), max_delay) self._similarity_measure = results[0] @@ -244,11 +222,7 @@ def correlation_strength(self): :rtype: 2D array [index, index] :return: the correlation strength matrix. """ - try: - return self._similarity_measure - except AttributeError as e: - raise AttributeError("Correlation strength matrix was deleted " - "earlier and cannot be retrieved.") from e + return self._similarity_measure def correlation_lag(self): """ @@ -257,11 +231,7 @@ def correlation_lag(self): :rtype: 2D array [index, index] :return: the lag at maximum cross-correlation matrix. """ - try: - return self._correlation_lag - except AttributeError as e: - raise AttributeError("Lag matrix was deleted " - "earlier and cannot be retrieved.") from e + return self._correlation_lag # # Methods to calculate weighted network measures @@ -273,11 +243,10 @@ def correlation_strength_weighted_average_path_length(self): :return float: the correlation strength weighted average path length. """ - if "correlation_strength" not in self._path_lengths_cached: - self.set_link_attribute("correlation_strength", - np.abs(self.correlation_strength())) - - return self.average_path_length("correlation_strength") + return self._weighted_metric( + "correlation_strength", + lambda: np.abs(self.correlation_strength()), + "average_path_length") def correlation_strength_weighted_closeness(self): """ @@ -286,11 +255,10 @@ def correlation_strength_weighted_closeness(self): :rtype: 1D array [index] :return: the correlation strength weighted closeness sequence. """ - if "correlation_strength" not in self._path_lengths_cached: - self.set_link_attribute("correlation_strength", - np.abs(self.correlation_strength())) - - return self.closeness("correlation_strength") + return self._weighted_metric( + "correlation_strength", + lambda: np.abs(self.correlation_strength()), + "closeness") def correlation_lag_weighted_average_path_length(self): """ @@ -298,11 +266,10 @@ def correlation_lag_weighted_average_path_length(self): :return float: the correlation lag weighted average path length. """ - if "correlation_lag" not in self._path_lengths_cached: - self.set_link_attribute("correlation_lag", - np.abs(self.correlation_lag())) - - return self.average_path_length("correlation_lag") + return self._weighted_metric( + "correlation_lag", + lambda: np.abs(self.correlation_lag()), + "average_path_length") def correlation_lag_weighted_closeness(self): """ @@ -311,11 +278,10 @@ def correlation_lag_weighted_closeness(self): :rtype: 1D array [index] :return: the correlation lag weighted closeness sequence. """ - if "correlation_lag" not in self._path_lengths_cached: - self.set_link_attribute("correlation_lag", - np.abs(self.correlation_lag())) - - return self.closeness("correlation_lag") + return self._weighted_metric( + "correlation_lag", + lambda: np.abs(self.correlation_lag()), + "closeness") def local_correlation_strength_weighted_vulnerability(self): """ @@ -324,11 +290,10 @@ def local_correlation_strength_weighted_vulnerability(self): :rtype: 1D array [index] :return: the correlation strength weighted vulnerability sequence. """ - if "correlation_strength" not in self._path_lengths_cached: - self.set_link_attribute("correlation_strength", - np.abs(self.correlation_strength())) - - return self.local_vulnerability("correlation_strength") + return self._weighted_metric( + "correlation_strength", + lambda: np.abs(self.correlation_strength()), + "local_vulnerability") def local_correlation_lag_weighted_vulnerability(self): """ @@ -337,8 +302,7 @@ def local_correlation_lag_weighted_vulnerability(self): :rtype: 1D array [index] :return: the correlation lag weighted vulnerability sequence. """ - if "correlation_lag" not in self._path_lengths_cached: - self.set_link_attribute("correlation_lag", - np.abs(self.correlation_lag())) - - return self.local_vulnerability("correlation_lag") + return self._weighted_metric( + "correlation_lag", + lambda: np.abs(self.correlation_lag()), + "local_vulnerability") diff --git a/src/pyunicorn/climate/hilbert.py b/src/pyunicorn/climate/hilbert.py index 3f72da7..03dcdcb 100644 --- a/src/pyunicorn/climate/hilbert.py +++ b/src/pyunicorn/climate/hilbert.py @@ -16,14 +16,9 @@ Provides classes for generating and analyzing complex climate networks. """ -# -# Import essential packages -# +from typing import Tuple, Hashable -# Import NumPy for the array object and fast numerics import numpy as np - -# Import scipy.signal for signal processing try: import scipy.signal except ImportError: @@ -31,7 +26,7 @@ "functionality in class HilbertClimateNetwork might not be " "available!") -# Import cnNetwork for Network base class +from .climate_data import ClimateData from .climate_network import ClimateNetwork @@ -92,8 +87,9 @@ def __init__(self, data, threshold=None, link_density=None, # Set instance variables self._coherence_phase = None - self.data = data - """(ClimateData) - The climate data used for network construction.""" + assert isinstance(data, ClimateData) + self.data: ClimateData = data + """The climate data used for network construction.""" self.N = data.grid.N self._threshold = threshold self._prescribed_link_density = link_density @@ -109,28 +105,26 @@ def __init__(self, data, threshold=None, link_density=None, silence_level=silence_level) self._set_directed(directed, calculate_coherence=False) + def __cache_state__(self) -> Tuple[Hashable, ...]: + return ClimateNetwork.__cache_state__(self) + + def __rec_cache_state__(self) -> Tuple[object, ...]: + return ClimateNetwork.__rec_cache_state__(self) + (self.data,) + def __str__(self): """ Return a string representation. """ return 'HilbertClimateNetwork:\n' + ClimateNetwork.__str__(self) - def clear_cache(self, irreversible=False): + def clear_cache(self): """ Clean up cache. - - If irreversible=True, the network cannot be recalculated using a - different threshold, or link density. - - :arg bool irreversible: The irreversibility of clearing the cache. """ - ClimateNetwork.clear_cache(self, irreversible) - - if irreversible: - try: - del self._coherence_phase - except AttributeError: - pass + try: + del self._coherence_phase + except AttributeError: + pass # # Defines methods to calculate Hilbert correlation measures diff --git a/src/pyunicorn/climate/mutual_info.py b/src/pyunicorn/climate/mutual_info.py index 781aa2b..380b461 100644 --- a/src/pyunicorn/climate/mutual_info.py +++ b/src/pyunicorn/climate/mutual_info.py @@ -16,26 +16,17 @@ Provides classes for generating and analyzing complex climate networks. """ -# -# Import essential packages -# +from typing import Tuple, Hashable -# array object and fast numerics import numpy as np from ..core._ext.types import to_cy, FIELD from ._ext.numerics import mutual_information - -# Import cnNetwork for Network base class +from .climate_data import ClimateData from .climate_network import ClimateNetwork -# -# Define class MutualInfoClimateNetwork -# - class MutualInfoClimateNetwork(ClimateNetwork): - """ Represents a mutual information climate network. @@ -87,8 +78,9 @@ def __init__(self, data, threshold=None, link_density=None, self.silence_level = silence_level # Set instance variables - self.data = data - """(ClimateData) - The climate data used for network construction.""" + assert isinstance(data, ClimateData) + self.data: ClimateData = data + """The climate data used for network construction.""" self.N = self.data.grid.N self._prescribed_link_density = link_density self._winter_only = winter_only @@ -108,6 +100,12 @@ def __init__(self, data, threshold=None, link_density=None, node_weight_type=node_weight_type, silence_level=silence_level) + def __cache_state__(self) -> Tuple[Hashable, ...]: + return ClimateNetwork.__cache_state__(self) + + def __rec_cache_state__(self) -> Tuple[object, ...]: + return ClimateNetwork.__rec_cache_state__(self) + (self.data,) + def __str__(self): """ Return a string representation of MutualInfoClimateNetwork. @@ -266,11 +264,10 @@ def mutual_information_weighted_average_path_length(self): :return float: the mutual information weighted average path length. """ - if "mutual_information" not in self._path_lengths_cached: - self.set_link_attribute("mutual_information", - abs(self.mutual_information())) - - return self.average_path_length("mutual_information") + return self._weighted_metric( + "mutual_information", + lambda: np.abs(self.mutual_information()), + "average_path_length") def mutual_information_weighted_closeness(self): """ @@ -279,11 +276,10 @@ def mutual_information_weighted_closeness(self): :rtype: 1D Numpy array [index] :return: the mutual information weighted closeness sequence. """ - if "mutual_information" not in self._path_lengths_cached: - self.set_link_attribute("mutual_information", - abs(self.mutual_information())) - - return self.closeness("mutual_information") + return self._weighted_metric( + "mutual_information", + lambda: np.abs(self.mutual_information()), + "closeness") def local_mutual_information_weighted_vulnerability(self): """ @@ -292,8 +288,7 @@ def local_mutual_information_weighted_vulnerability(self): :rtype: 1D Numpy array [index] :return: the mutual information weighted vulnerability sequence. """ - if "mutual_information" not in self._path_lengths_cached: - self.set_link_attribute("mutual_information", - abs(self.mutual_information())) - - return self.local_vulnerability("mutual_information") + return self._weighted_metric( + "mutual_information", + lambda: np.abs(self.mutual_information()), + "local_vulnerability") diff --git a/src/pyunicorn/climate/tsonis.py b/src/pyunicorn/climate/tsonis.py index a639885..df95c94 100644 --- a/src/pyunicorn/climate/tsonis.py +++ b/src/pyunicorn/climate/tsonis.py @@ -16,22 +16,13 @@ Provides classes for generating and analyzing complex climate networks. """ -# -# Import essential packages -# - -# Import NumPy for the array object and fast numerics import numpy as np from .climate_network import ClimateNetwork from .climate_data import ClimateData -from ..core.network import cached_const +from ..core.cache import Cached -# -# Define class TsonisClimateNetwork -# - # TODO: Reconsider storage of correlation matrix without taking absolute value. class TsonisClimateNetwork(ClimateNetwork): @@ -197,7 +188,7 @@ def calculate_similarity_measure(self, anomaly): """ return self._calculate_correlation(anomaly) - @cached_const('base', 'correlation') + @Cached.method() def correlation(self): """ Return the correlation matrix at zero lag. diff --git a/src/pyunicorn/core/__init__.py b/src/pyunicorn/core/__init__.py index 6d224e1..a24ddd8 100644 --- a/src/pyunicorn/core/__init__.py +++ b/src/pyunicorn/core/__init__.py @@ -28,7 +28,7 @@ # Import classes # -from .network import Network, NetworkError, nz_coords, cached_const +from .network import Network, NetworkError, nz_coords from .spatial_network import SpatialNetwork from .geo_network import GeoNetwork from .grid import Grid diff --git a/src/pyunicorn/core/cache.py b/src/pyunicorn/core/cache.py new file mode 100644 index 0000000..6904ad9 --- /dev/null +++ b/src/pyunicorn/core/cache.py @@ -0,0 +1,179 @@ +# This file is part of pyunicorn. +# Copyright (C) 2008--2024 Jonathan F. Donges and pyunicorn authors +# URL: +# License: BSD (3-clause) +# +# Please acknowledge and cite the use of this software and its authors +# when results are used in publications or published elsewhere. +# +# You can use the following reference: +# J.F. Donges, J. Heitzig, B. Beronov, M. Wiedermann, J. Runge, Q.-Y. Feng, +# L. Tupikina, V. Stolbova, R.V. Donner, N. Marwan, H.A. Dijkstra, +# and J. Kurths, "Unified functional network and nonlinear time series analysis +# for complex systems science: The pyunicorn package" + +""" +Provides a mix-in class that manages an LRU cache of derived quantities +at the instance level, with declared dependencies on mutable attributes. +""" + +from abc import ABC, abstractmethod +from functools import lru_cache, wraps +from inspect import getmembers, ismethod +from typing import Tuple, Hashable, Optional + + +class Cached(ABC): + """ + Mix-in class which manages an LRU cache for subclass methods that + constitute derived quantities, based on `@functools.lru_cache()`. + The cache is populated by calling decorated methods (with new arguments), + has a bounded number of slots per method, and can be cleared manually. + + A subclasses should: + - decorate derived quantity methods with `@Cached.method()`, + - provide a method `__cache_state__() -> Tuple[Hashable,...]`, which is + used by `Cached` to define the `__eq__()` and `__hash__()` methods + used by `@functools.lru_cache()`, and + - optionally provide a similar method to list owned `Cached` instances: + `__rec_cache_state__() -> Tuple[object,...]`. + + Class attributes which affect subsequently *defined* subclasses: + - cache_enable: toggles caching globally + - lru_params: sets `@functools.lru_cache()` parameters + + NOTE: + The intended behaviour is specified by `tests/test_core/test_cache.py`. + """ + cache_enable = True + lru_params = {"maxsize": 32, "typed": True} + + @abstractmethod + def __cache_state__(self) -> Tuple[Hashable, ...]: + """ + Hashable tuple of mutable object attributes, which will determine the + instance identity for ALL cached method lookups in this class, + *in addition* to the built-in object `id()`. Returning an empty tuple + amounts to declaring the object immutable in general. Mutable + dependencies that are specific to a method should instead be declared + via `@Cached.method(attrs=(...))`. + + NOTE: + A subclass is responsible for the consistency and cost of this state + descriptor. For example, hashing a large array attribute may be + circumvented by declaring it as a property, with a custom setter method + that increments a dedicated mutation counter. + """ + + def __rec_cache_state__(self) -> Tuple[object, ...]: + """ + Similar to `__cache_state__()`, but lists attributes which are + themselves instances of `Cached`. Empty by default. + """ + return () + + def __eq__(self, other): + if self is not other: + return False + else: + s, t = (o.__cache_state__() for o in [self, other]) + S, T = (o.__rec_cache_state__() for o in [self, other]) + assert all(isinstance(o, tuple) for o in [s, t]) + assert all(isinstance(o, tuple) for o in [S, T]) + assert len(s) == len(t) and len(S) == len(T) + return s == t and all(s_ == t_ for s_, t_ in zip(S, T)) + + def __hash__(self): + s = self.__cache_state__() + S = self.__rec_cache_state__() + assert isinstance(s, tuple) and isinstance(S, tuple) + return hash(sum( + (((None,) if t is None else t.__cache_state__()) for t in S), + start=(id(self),) + s)) + + @classmethod + def method(cls, name: Optional[str] = None, + attrs: Optional[Tuple[str, ...]] = None): + """ + Caching decorator based on `@functools.lru_cache()`. + + Cache entries for decorated methods are indexed by the combination of: + - the object `id()`, + - the object-level mutable instance attributes as declared by the + subclass method `__cache_state__()`, + - the object-level mutable instance attributes as declared by the + optional subclass method `__rec_cache_state__()`, + - the method-level mutable instance attributes as declared by the + optional decorator argument `attrs`, and + - the argument pattern at the call site, including the ordering of + named arguments. + + The decorated method provides several attributes of its own, as defined + by `@functools.lru_cache()`, including: + - `cache_clear()`: delete this method cache for ALL `cls` instances + - `__wrapped__`: the original method + + :arg name: Optionally print a message at the first method invocation. + :arg attrs: Optionally declare attribute names as dependencies. + + NOTE: + The same reasoning about consistency and cost applies to the `attrs` + argument as to the `__cache_state__()` method. + """ + # evaluated at decorator instantiation + assert attrs is None or ( + isinstance(attrs, tuple) and len(attrs) > 0 + and all(isinstance(a, str) for a in attrs)) + assert name is None or isinstance(name, str) + + def wrapper(f): + # evaluated at decorator application (method definition) + if not cls.cache_enable: + return f + else: + def uncached(self, *args, **kwargs): + # evaluated at uncached method invocation + if ( + name is not None and + getattr(self, "silence_level", 0) <= 1): + print(f"Calculating {name}...") + return f(self, *args, **kwargs) + + if attrs is None: + # leave call signature unchanged + wrapped = lru_cache(**cls.lru_params)(uncached) + else: + # present `attrs` as arguments for cache lookup + @lru_cache(**cls.lru_params) + def cached(self, *args, **kwargs): + return uncached(self, *args[len(attrs):], **kwargs) + + @wraps(cached, assigned=( + 'cache_info', 'cache_clear', '__wrapped__')) + def wrapped(self, *args, **kwargs): + _attrs = (getattr(self, a) for a in attrs) + return cached(self, *_attrs, *args, **kwargs) + + # fully decorated method + return wraps(f)(wrapped) + return wrapper + + def cache_clear(self, prefix: Optional[str] = None): + """ + *Delete* all method caches for ALL instances of `self.__class__`, + and recursively for owned `Cached` instances. This is simply a loop + over the `cache_clear()` methods of all cached methods. + + :arg prefix: Optionally restrict the deleted caches by method name. + + NOTE: + Instead, *invalidating* method caches for a SINGLE instance is achieved + by modifying the relevant subset of mutable attributes, see + `@Cached.method()`. + """ + for n, m in getmembers(self, predicate=ismethod): + if prefix is None or n.startswith(prefix): + if all(hasattr(m, p) for p in ["cache_clear", "__wrapped__"]): + m.cache_clear() + for c in self.__rec_cache_state__(): + c.cache_clear(prefix=prefix) diff --git a/src/pyunicorn/core/data.py b/src/pyunicorn/core/data.py index 421df85..8c86fd2 100644 --- a/src/pyunicorn/core/data.py +++ b/src/pyunicorn/core/data.py @@ -17,12 +17,9 @@ multivariate data and generating time series surrogates. """ -# -# Imports -# +from typing import Optional import numpy as np - try: from h5netcdf.legacyapi import Dataset except ImportError: @@ -32,14 +29,9 @@ print("pyunicorn: Packages netCDF4 or h5netcdf could not be loaded. " "Some functionality in class Data might not be available!") - from .geo_grid import GeoGrid -# -# Define class Data -# - class Data: """ @@ -55,8 +47,9 @@ class Data: # Define internal methods # - def __init__(self, observable, grid, observable_name=None, - observable_long_name=None, window=None, silence_level=0): + def __init__(self, observable: np.ndarray, grid: GeoGrid, + observable_name: str = None, observable_long_name: str = None, + window: Optional[dict] = None, silence_level: int = 0): """ Initialize an instance of Data. @@ -69,8 +62,8 @@ def __init__(self, observable, grid, observable_name=None, :arg observable: The array of time series to be represented by the :class:`Data` instance. :type grid: :class:`.GeoGrid` instance - :arg grid: The Grid representing the spatial coordinates associated to - the time series and their temporal sampling. + :arg grid: The GeoGrid representing the spatial coordinates associated + to the time series and their temporal sampling. :arg str observable_name: A short name for the observable. :arg str observable_long_name: A long name for the observable. :arg dict window: Spatio-temporal window to select a view on the data. @@ -80,6 +73,8 @@ def __init__(self, observable, grid, observable_name=None, self.silence_level = silence_level """(int) - The inverse level of verbosity of the object.""" self._full_observable = observable + + assert isinstance(grid, GeoGrid) self._full_grid = grid self.grid = None """The :class:`.GeoGrid` object associated with the data.""" @@ -129,16 +124,6 @@ def set_silence_level(self, silence_level): self.grid.silence_level = silence_level self._full_grid.silence_level = silence_level - def clear_cache(self): - """ - Clean up cache. - - Is reversible, since all cached information can be recalculated from - basic data. - """ - self.grid.clear_cache() - self._full_grid.clear_cache() - # # Methods for creating Data objects and alternative constructors # diff --git a/src/pyunicorn/core/geo_grid.py b/src/pyunicorn/core/geo_grid.py index 37f166b..ce797fb 100644 --- a/src/pyunicorn/core/geo_grid.py +++ b/src/pyunicorn/core/geo_grid.py @@ -16,14 +16,9 @@ Provides class for horizontal two-dimensional spatio-temporal grid. """ -# -# Import essential packages -# - +from typing import Tuple, Hashable -# array object and fast numerics import numpy as np - # Import package to calculate points inside a polygon try: from matplotlib import path @@ -31,21 +26,13 @@ print("An error occurred when importing matplotlib.path! " "Some functionality in GeoGrid class might not be available.") -# Cythonized functions from ._ext.types import to_cy, FIELD from ._ext.numerics import _calculate_angular_distance - -# Grid base class +from .cache import Cached from .grid import Grid -# -# Define class GeoGrid -# - - class GeoGrid(Grid): - """ Encapsulates a horizontal two-dimensional spatio-temporal grid on the sphere. @@ -58,7 +45,9 @@ class GeoGrid(Grid): # Definitions of internal methods # - def __init__(self, time_seq, lat_seq, lon_seq, silence_level=0): + def __init__(self, time_seq: np.ndarray, + lat_seq: np.ndarray, lon_seq: np.ndarray, + silence_level: int = 0): """ Initialize an instance of GeoGrid. @@ -77,9 +66,8 @@ def __init__(self, time_seq, lat_seq, lon_seq, silence_level=0): Grid.__init__(self, time_seq, np.vstack((lat_seq, lon_seq)), silence_level) - # Cache - self._angular_distance = None - self._angular_distance_cached = False + def __cache_state__(self) -> Tuple[Hashable, ...]: + return Grid.__cache_state__(self) def __str__(self): """ @@ -88,17 +76,6 @@ def __str__(self): return (f"GeoGrid: {self._grid_size['space']} grid points, " f"{self._grid_size['time']} timesteps.") - def clear_cache(self): - """ - Clean up cache. - - Is reversible, since all cached information can be recalculated from - basic data. - """ - if self._angular_distance_cached: - del self._angular_distance - self._angular_distance_cached = False - # # Functions for loading and saving the Grid object # @@ -412,38 +389,10 @@ def distance(self): """ return self.angular_distance() - def calculate_angular_distance(self): - """ - Calculate and return the angular great circle distance matrix. - - **No normalization applied anymore!** Return values are in the range - 0 to Pi. - - :rtype: 2D Numpy array [index, index] - :return: the angular great circle distance matrix (unit radians). - """ - if self.silence_level <= 1: - print("Calculating angular great circle distance using Cython...") - - # Get number of nodes - N = self.N - - # Get sequences of cosLat, sinLat, cosLon and sinLon for all nodes - cos_lat = to_cy(self.cos_lat(), FIELD) - sin_lat = to_cy(self.sin_lat(), FIELD) - cos_lon = to_cy(self.cos_lon(), FIELD) - sin_lon = to_cy(self.sin_lon(), FIELD) - - # Initialize cython cof of angular distance matrix - cosangdist = np.zeros((N, N), dtype=FIELD) - - _calculate_angular_distance(cos_lat, sin_lat, cos_lon, sin_lon, - cosangdist, N) - return np.arccos(cosangdist) - + @Cached.method(name="angular great circle distance") def angular_distance(self): """ - Return the angular great circle distance matrix. + Calculate the angular great circle distance matrix. **No normalization applied anymore!** Return values are in the range 0 to Pi. @@ -461,11 +410,18 @@ def angular_distance(self): :rtype: 2D Numpy array [index, index] :return: the angular great circle distance matrix. """ - if not self._angular_distance_cached: - self._angular_distance = self.calculate_angular_distance() - self._angular_distance_cached = True - - return self._angular_distance + # Get number of nodes + N = self.N + # Initialize cython cof of angular distance matrix + cosangdist = np.zeros((N, N), dtype=FIELD) + _calculate_angular_distance( + # Get sequences of cosLat, sinLat, cosLon and sinLon for all nodes + to_cy(self.cos_lat(), FIELD), + to_cy(self.sin_lat(), FIELD), + to_cy(self.cos_lon(), FIELD), + to_cy(self.sin_lon(), FIELD), + cosangdist, N) + return np.arccos(cosangdist) def boundaries(self): """ diff --git a/src/pyunicorn/core/geo_network.py b/src/pyunicorn/core/geo_network.py index 5358d2d..6c7e8a3 100644 --- a/src/pyunicorn/core/geo_network.py +++ b/src/pyunicorn/core/geo_network.py @@ -16,23 +16,17 @@ Provides class for analyzing complex network embedded on a spherical surface. """ -# array object and fast numerics +from typing import Tuple, Hashable + import numpy as np -# random number generation from numpy import random -# high performance graph theory tools written in pure ANSI-C import igraph from .spatial_network import SpatialNetwork from .geo_grid import GeoGrid -# -# Define class GeoNetwork -# - class GeoNetwork(SpatialNetwork): - """ Encapsulates a network embedded on a spherical surface. @@ -47,8 +41,8 @@ class GeoNetwork(SpatialNetwork): # Definitions of internal methods # - def __init__(self, grid, adjacency=None, edge_list=None, directed=False, - node_weight_type="surface", silence_level=0): + def __init__(self, grid: GeoGrid, adjacency=None, edge_list=None, + directed=False, node_weight_type="surface", silence_level=0): """ Initialize an instance of GeoNetwork. @@ -71,12 +65,9 @@ def __init__(self, grid, adjacency=None, edge_list=None, directed=False, - "surface" (cos lat) - "irrigation" (cosĀ² lat) """ - if grid.__class__.__name__ != "GeoGrid": - raise TypeError("GeoNetwork can only be created with GeoGrid!") - - self.grid = grid - """(Grid) - GeoGrid object describing the network's spatial - embedding""" + assert isinstance(grid, GeoGrid) + self.grid: GeoGrid = grid + """GeoGrid object describing the network's spatial embedding""" # Call constructor of parent class Network SpatialNetwork.__init__(self, grid=grid, adjacency=adjacency, @@ -91,6 +82,14 @@ def __init__(self, grid, adjacency=None, edge_list=None, directed=False, self.grid_neighbours = None self.grid_neighbours_set = None + def __cache_state__(self) -> Tuple[Hashable, ...]: + # The following attributes are assumed immutable: + # (grid) + return SpatialNetwork.__cache_state__(self) + + def __rec_cache_state__(self) -> Tuple[object, ...]: + return (self.grid,) + def __str__(self): """ Return a string representation of the GeoNetwork object. @@ -98,15 +97,6 @@ def __str__(self): return (f'GeoNetwork:\n{SpatialNetwork.__str__(self)}\n' f'Geographical boundaries:\n{self.grid.print_boundaries()}') - def clear_cache(self): - """ - Clean up cache. - - Is reversible, since all cached information can be recalculated from - basic data. - """ - SpatialNetwork.clear_cache(self) - def set_node_weight_type(self, node_weight_type): """ Set node weights for calculation of n.s.i. measures according to @@ -204,9 +194,8 @@ def Load(filename, fileformat=None, silence_level=0, *args, **kwds): # Overwrite igraph Graph object in Network instance to restore link # attributes/weights net.graph = graph - # Restore link attributes/weights - net.clear_paths_cache() - + # invalidate cache + net._mut_la += 1 return net def save_for_cgv(self, filename, fileformat="graphml"): diff --git a/src/pyunicorn/core/grid.py b/src/pyunicorn/core/grid.py index 4548084..536b1b9 100644 --- a/src/pyunicorn/core/grid.py +++ b/src/pyunicorn/core/grid.py @@ -16,28 +16,17 @@ Provides class for spatio-temporal grids. """ -# -# Import essential packages -# - -# Import pickle for loading and saving Python objects +from typing import Tuple, Hashable import pickle -# array object and fast numerics import numpy as np -# Cythonized functions +from .cache import Cached from ._ext.types import to_cy, FIELD from ._ext.numerics import _calculate_euclidean_distance -# -# Define class Grid -# - - -class Grid: - +class Grid(Cached): """ Encapsulates a spatio-temporal grid. @@ -49,15 +38,16 @@ class Grid: # Definitions of internal methods # - def __init__(self, time_seq, space_seq, silence_level=0): + def __init__(self, time_seq: np.ndarray, space_seq: np.ndarray, + silence_level: int = 0): """ Initialize an instance of Grid. :type time_seq: 1D Numpy array [time] :arg time_seq: The increasing sequence of temporal sampling points. - :type lat_seq: 2D Numpy array [dim, index] - :arg lat_seq: The sequences of spatial sampling points. + :type space_seq: 2D Numpy array [dim, index] + :arg space_seq: The sequences of spatial sampling points. :type silence_level: number (int) :arg silence_level: The inverse level of verbosity of the object. @@ -88,9 +78,10 @@ def __init__(self, time_seq, space_seq, silence_level=0): self.n_grid_points = self._grid_size["time"] * self.N """(number (int)) - The total number of data points / samples.""" - # Cache - self._euclidean_distance = None - self._euclidean_distance_cached = False + def __cache_state__(self) -> Tuple[Hashable, ...]: + # The following attributes are assumed immutable: + # (N, _grid) + return () def __str__(self): """ @@ -291,27 +282,7 @@ def distance(self): """ return self.euclidean_distance() - def calculate_euclidean_distance(self): - """ - Calculate and return the euclidean distance matrix. - - :rtype: 2D Numpy array [index, index] - :return: the euclidean distance matrix. - """ - # Get number of nodes - N_nodes = self.N - - # Get sequences of coordinates - sequences = to_cy(self._grid["space"], FIELD) - - # Get number of dimensions - N_dim = sequences.shape[0] - - distance = np.zeros((N_nodes, N_nodes), dtype=FIELD) - _calculate_euclidean_distance(sequences, distance, N_dim, N_nodes) - - return distance - + @Cached.method() def euclidean_distance(self): """ Return the euclidean distance matrix between grid points. @@ -329,11 +300,18 @@ def euclidean_distance(self): :rtype: 2D Numpy array [index, index] :return: the euclidean distance matrix. """ - if not self._euclidean_distance_cached: - self._euclidean_distance = self.calculate_euclidean_distance() - self._euclidean_distance_cached = True + # Get number of nodes + N_nodes = self.N - return self._euclidean_distance + # Get sequences of coordinates + sequences = to_cy(self._grid["space"], FIELD) + + # Get number of dimensions + N_dim = sequences.shape[0] + + distance = np.zeros((N_nodes, N_nodes), dtype=FIELD) + _calculate_euclidean_distance(sequences, distance, N_dim, N_nodes) + return distance def boundaries(self): """ diff --git a/src/pyunicorn/core/interacting_networks.py b/src/pyunicorn/core/interacting_networks.py index a5e82ba..cdd7840 100644 --- a/src/pyunicorn/core/interacting_networks.py +++ b/src/pyunicorn/core/interacting_networks.py @@ -17,7 +17,6 @@ multivariate data and generating time series surrogates. """ -# Import NumPy for the array object and fast numerics import numpy as np from numpy import random @@ -26,14 +25,9 @@ _cross_transitivity, _nsi_cross_transitivity, _cross_local_clustering, \ _nsi_cross_local_clustering -# Import Network base class from .network import Network, NetworkError -# -# Define class InteractingNetworks -# - class InteractingNetworks(Network): """ @@ -71,7 +65,6 @@ def __init__(self, adjacency, directed=False, node_weights=None, weight of node i. (Default: list of ones) :arg int silence_level: The inverse level of verbosity of the object. """ - # Call constructor of parent class Network Network.__init__(self, adjacency=adjacency, directed=directed, node_weights=node_weights, silence_level=silence_level) diff --git a/src/pyunicorn/core/network.py b/src/pyunicorn/core/network.py index 69f0941..c81aaa6 100644 --- a/src/pyunicorn/core/network.py +++ b/src/pyunicorn/core/network.py @@ -17,13 +17,10 @@ multivariate data and generating time series surrogates. """ -# -# Import essential packages -# - import sys # performance testing import time -from functools import partial, wraps +from functools import partial +from typing import Tuple, Hashable, Optional from multiprocessing import get_context, cpu_count import numpy as np # array object and fast numerics @@ -36,6 +33,7 @@ import igraph # high performance graph theory tools +from .cache import Cached from ..utils import mpi # parallelized computations from ._ext.types import \ @@ -47,6 +45,7 @@ # ============================================================================= +# Utilities def nz_coords(matrix): @@ -59,50 +58,6 @@ def nz_coords(matrix): return np.array(matrix.nonzero()).T -def cache_helper(self, cat, key, msg, func, *args, **kwargs): - """ - Cache result of a function in a subdict of :attr:`self.cache`. - - :arg str cat: cache category - :arg str key: cache key - :arg str msg: message to be displayed during first calculation - :arg func func: function to be cached - """ - # categories can be added on the fly?!?! - self.cache.setdefault(cat, {}) - - if self.cache[cat].setdefault(key) is None: - if msg is not None and self.silence_level <= 1: - print('Calculating ' + msg + '...') - self.cache[cat][key] = func(self, *args, **kwargs) - return self.cache[cat][key] - - -def cached_const(cat, key, msg=None): - """ - Cache result of decorated method in a fixed subdict of :attr:`self.cache`. - """ - def wrapper(func): - @wraps(func) - def wrapped(self, *args, **kwargs): - return cache_helper(self, cat, key, msg, func, *args, **kwargs) - return wrapped - return wrapper - - -def cached_var(cat, msg=None): - """ - Cache result of decorated method in a variable subdict of - :attr:`self.cache`, specified as first argument to the decorated method. - """ - def wrapper(func): - @wraps(func) - def wrapped(self, key=None, **kwargs): - return cache_helper(self, cat, key, msg, func, key, **kwargs) - return wrapped - return wrapper - - class NetworkError(Exception): """ Used for all exceptions raised by Network. @@ -116,6 +71,7 @@ def __str__(self): # ============================================================================= +# Doctest helpers def r(obj, decimals=4): @@ -150,11 +106,8 @@ def rr(obj, decimals=4): # ============================================================================= -# -# Define class Network -# -class Network: +class Network(Cached): """ A Network is a simple, undirected or directed graph with optional node and/or link weights. This class encapsulates data structures and methods to @@ -211,53 +164,58 @@ def __init__(self, adjacency=None, n_nodes=None, edge_list=None, :return: The new network. """ - self.directed = directed - """(bool) Indicates whether the network is directed.""" - self.silence_level = silence_level - """(int>=0) higher -> less progress info""" + self.directed: bool = directed + """indicates whether the network is directed""" + self.silence_level: int = silence_level + """higher -> less progress info""" - if n_nodes is None: - self.N = 0 - """(int>0) number of nodes""" - else: + self._mut_A: int = 0 + """mutation count tracking `self.adjcency`""" + self._mut_nw: int = 0 + """mutation count tracking `self.node_weights`""" + self._mut_la: int = 0 + """mutation count tracking `self.graph.es`""" + + self.N: int = 0 + """number of nodes""" + if n_nodes is not None: self.N = n_nodes - self.n_links = 0 - """(int>0) number of links""" - self.link_density = 0 - """(0 j. Symmetric if the - network is undirected.""" + network is undirected. + """ self.sp_dtype = None - self.graph = None - """(igraph.Graph) Embedded graph object providing some standard network - measures.""" + self.graph: igraph.Graph = None + """embedded graph object providing some standard network measures""" - self._node_weights = None - """(array([double>=0])) array of node weights""" - self.mean_node_weight = 0 + self._node_weights: Optional[np.ndarray] = None + self.mean_node_weight: float = 0 """mean node weight""" - self.total_node_weight = 0 + self.total_node_weight: float = 0 """total node weight""" - self.cache = {'base': {}, 'nsi': {}, 'paths': {}} - """(dict) cache of re-usable computation results""" - if adjacency is not None: - self._set_adjacency(adjacency) + self.adjacency = adjacency elif edge_list is not None: self.set_edge_list(edge_list, n_nodes) else: raise NetworkError("An adjacency matrix or edge list has to be " "given to initialize an instance of Network.") - self._set_node_weights(node_weights) + self.node_weights = node_weights self.degree() + def __cache_state__(self) -> Tuple[Hashable, ...]: + return (self.directed, self._mut_A,) + def __str__(self): """ Return a short summary of the network. @@ -287,29 +245,6 @@ def __len__(self): """ return self.N - def clear_cache(self): - """ - Clear cache of information that can be recalculated from basic data. - """ - self.cache['base'] = {} - self.clear_nsi_cache() - self.clear_paths_cache() - - def clear_nsi_cache(self): - """ - Clear cache of information that can be recalculated from basic data - and depends on the node weights. - """ - self.cache['nsi'] = {} - - def clear_paths_cache(self): - """ - Clear cache of path legths for link attributes. - """ - for attr in self.cache['paths']: - self.clear_link_attribute(attr) - self.cache['paths'] = {} - def copy(self): """ Return a copy of the network. @@ -440,7 +375,8 @@ def adjacency(self): """ return self.sp_A.A - def _set_adjacency(self, adjacency): + @adjacency.setter + def adjacency(self, adjacency): """ Set a new adjacency matrix. @@ -482,11 +418,9 @@ def _set_adjacency(self, adjacency): self.graph = igraph.Graph(n=N, edges=list(edges), directed=self.directed) self.graph.simplify() - Network.clear_cache(self) - @adjacency.setter - def adjacency(self, adjacency): - self._set_adjacency(adjacency) + # invalidate cache + self._mut_A += 1 def set_edge_list(self, edge_list, n_nodes=None): """ @@ -522,10 +456,11 @@ def set_edge_list(self, edge_list, n_nodes=None): @property def node_weights(self): - """(array([double>=0])) array of node weights""" + """array of node weights""" return self._node_weights - def _set_node_weights(self, weights): + @node_weights.setter + def node_weights(self, weights: Optional[np.ndarray]): """ Set the node weights to be used for node splitting invariant network measures. @@ -541,7 +476,6 @@ def _set_node_weights(self, weights): :arg weights: array-like [node] of weights (default: [1...1]) """ N = self.N - self.clear_nsi_cache() if weights is None: w = np.ones(N, dtype=DWEIGHT) @@ -554,9 +488,8 @@ def _set_node_weights(self, weights): self.mean_node_weight = w.mean() self.total_node_weight = w.sum() - @node_weights.setter - def node_weights(self, node_weights): - self._set_node_weights(node_weights) + # invalidate cache + self._mut_nw += 1 def sp_Aplus(self): """A^+ = A + Id. matrix used in n.s.i. measures""" @@ -706,8 +639,8 @@ def FromIGraph(graph, silence_level=0): # Overwrite igraph Graph object in Network instance to restore link # attributes/weights net.graph = graph - net.clear_paths_cache() - + # invalidate cache + net._mut_la += 1 return net @staticmethod @@ -848,7 +781,6 @@ def ErdosRenyi(n_nodes=100, link_probability=None, n_links=None, raise ValueError("`ErdosRenyi()` requires either a " "`link_probability` or `n_links` argument.") - # return adjacency matrix return np.array(graph.get_adjacency(type=2).data) @staticmethod @@ -888,7 +820,6 @@ def BarabasiAlbert_igraph(n_nodes=100, n_links_each=5, silence_level=0): # actual degree sequence of the generated graph, but just slightly graph.simplify() - # return adjacency matrix return np.array(graph.get_adjacency(type=2).data) # FIXME: Add example @@ -926,7 +857,6 @@ def BarabasiAlbert(n_nodes=100, n_links_each=5, silence_level=0): targets[n_targets + m: n_targets + 2*m] = j n_targets += 2*m - # Return adjacency matrix return A @staticmethod @@ -959,7 +889,6 @@ def Configuration(degrees, silence_level=0): # actual degree sequence of the generated graph, but just slightly graph.simplify() - # Return adjacency matrix return np.array(graph.get_adjacency(type=2).data) @staticmethod @@ -993,7 +922,6 @@ def WattsStrogatz(N, k, p): graph = igraph.Graph.Watts_Strogatz(dim=1, size=N, nei=k, p=p) - # Return adjacency matrix return np.array(graph.get_adjacency(type=2).data) @staticmethod @@ -1327,7 +1255,6 @@ def _inc_target(): total_inc_prob += inc_prob[this_N] + inc_prob[i] this_N += 1 pbar.update() - return w def randomly_rewire(self, iterations): @@ -1532,7 +1459,7 @@ def _cum_histogram(values, n_bins, interval=None): # Methods working with node attributes # - def set_node_attribute(self, attribute_name, values): + def set_node_attribute(self, attribute_name: str, values): """ Add a node attribute. @@ -1551,7 +1478,7 @@ def set_node_attribute(self, attribute_name, values): "has to have the same length as the number of nodes " "in the graph.") - def node_attribute(self, attribute_name): + def node_attribute(self, attribute_name: str): """ Return a node attribute. @@ -1564,19 +1491,23 @@ def node_attribute(self, attribute_name): """ return np.array(self.graph.vs.get_attribute_values(attribute_name)) - def del_node_attribute(self, attribute_name): + def del_node_attribute(self, attribute_name: str): """ Delete a node attribute. :arg str attribute_name: Name of node attribute to be deleted. """ - del self.graph.vs[attribute_name] + if attribute_name in self.graph.vs.attributes(): + del self.graph.vs[attribute_name] # # Methods working with link attributes # - def average_link_attribute(self, attribute_name): + def find_link_attribute(self, attribute_name: str): + return attribute_name in self.graph.es.attributes() + + def average_link_attribute(self, attribute_name: str): """ For each node, return the average of a link attribute over all links of that node. @@ -1587,7 +1518,7 @@ def average_link_attribute(self, attribute_name): """ return self.link_attribute(attribute_name).mean(axis=1) - def link_attribute(self, attribute_name): + def link_attribute(self, attribute_name: str): """ Return the values of a link attribute. @@ -1607,29 +1538,18 @@ def link_attribute(self, attribute_name): for e in self.graph.es: weights[e.tuple] = e[attribute_name] weights[e.tuple[1], e.tuple[0]] = e[attribute_name] - return weights - def clear_link_attribute(self, attribute_name): - """ - Clear cache of a link attribute. - - :arg str attribute_name: name of link attribute - """ - if attribute_name in self.cache['paths']: - del self.cache['paths'][attribute_name] - - def del_link_attribute(self, attribute_name): + def del_link_attribute(self, attribute_name: str): """ Delete a link attribute. :arg str attribute_name: name of link attribute to be deleted """ - if attribute_name in self.cache['paths']: - self.clear_link_attribute(attribute_name) + if self.find_link_attribute(attribute_name): del self.graph.es[attribute_name] - else: - print("WARNING: Link attribute", attribute_name, "not found!") + # invalidate cache + self._mut_la += 1 def set_link_attribute(self, attribute_name, values): """ @@ -1648,16 +1568,14 @@ def set_link_attribute(self, attribute_name, values): """ for e in self.graph.es: e[attribute_name] = values[e.tuple] - - # Set Network specific attributes - self.clear_link_attribute(attribute_name) + # invalidate cache + self._mut_la += 1 # # Degree related measures # - # @cached_const('base', 'degree') - @cached_var('degree') + @Cached.method() def degree(self, key=None): """ Return list of degrees. @@ -1677,7 +1595,7 @@ def degree(self, key=None): else: return self.outdegree(key) - @cached_var('indegree') + @Cached.method(attrs=("_mut_la",)) def indegree(self, key=None): """ Return list of in-degrees. @@ -1698,7 +1616,7 @@ def indegree(self, key=None): else: return self.link_attribute(key).sum(axis=0).T - @cached_var('outdegree') + @Cached.method(attrs=("_mut_la",)) def outdegree(self, key=None): """ Return list of out-degrees. @@ -1719,7 +1637,7 @@ def outdegree(self, key=None): else: return self.link_attribute(key).sum(axis=1).T - @cached_var('bildegree') + @Cached.method(attrs=("_mut_la",)) def bildegree(self, key=None): """ Return list of bilateral degrees, i.e. the number of simultaneously in- @@ -1752,7 +1670,7 @@ def sp_nsi_diag_k_inv(self): return sp.diags([np.power(self.nsi_degree(), -1)], [0], shape=(self.N, self.N), format='csc') -# @cached_var('nsi_degree') + @Cached.method(attrs=("_mut_nw", "_mut_la")) def nsi_degree(self, key=None, typical_weight=None): """ For each node, return its uncorrected or corrected n.s.i. degree. @@ -1805,7 +1723,7 @@ def nsi_degree(self, key=None, typical_weight=None): else: return res/typical_weight - 1.0 -# @cached_var('nsi_indegree') + @Cached.method(attrs=("_mut_nw", "_mut_la")) def nsi_indegree(self, key=None, typical_weight=None): """ For each node, return its n.s.i. indegree. @@ -1846,7 +1764,7 @@ def nsi_indegree(self, key=None, typical_weight=None): else: return res/typical_weight - 1.0 -# @cached_var('nsi_outdegree') + @Cached.method(attrs=("_mut_nw", "_mut_la")) def nsi_outdegree(self, key=None, typical_weight=None): """ For each node, return its n.s.i. outdegree. @@ -1887,7 +1805,7 @@ def nsi_outdegree(self, key=None, typical_weight=None): else: return res/typical_weight - 1.0 -# @cached_var('nsi_bildegree') + @Cached.method(attrs=("_mut_nw",)) def nsi_bildegree(self, key=None, typical_weight=None): """ For each node, return its n.s.i. bilateral degree. @@ -1911,7 +1829,7 @@ def nsi_bildegree(self, key=None, typical_weight=None): else: return res/typical_weight - 1.0 - @cached_const('base', 'degree df', 'the degree frequency distribution') + @Cached.method(name="the degree frequency distribution") def degree_distribution(self): """ Return the degree frequency distribution. @@ -1928,7 +1846,7 @@ def degree_distribution(self): k = self.degree() return self._histogram(values=k, n_bins=k.max())[0] - @cached_const('base', 'indegree df', 'in-degree frequency distribution') + @Cached.method(name="the in-degree frequency distribution") def indegree_distribution(self): """ Return the in-degree frequency distribution. @@ -1945,7 +1863,7 @@ def indegree_distribution(self): ki = self.indegree() return self._histogram(values=ki, n_bins=ki.max())[0] - @cached_const('base', 'outdegree df', 'out-degree frequency distribution') + @Cached.method(name="the out-degree frequency distribution") def outdegree_distribution(self): """ Return the out-degree frequency distribution. @@ -1962,7 +1880,7 @@ def outdegree_distribution(self): ko = self.outdegree() return self._histogram(values=ko, n_bins=ko.max()+1)[0] - @cached_const('base', 'degree cdf', 'the cumulative degree distribution') + @Cached.method(name="the cumulative degree distribution") def degree_cdf(self): """ Return the cumulative degree frequency distribution. @@ -1979,8 +1897,7 @@ def degree_cdf(self): k = self.degree() return self._cum_histogram(values=k, n_bins=k.max())[0] - @cached_const('base', 'indegree cdf', - 'the cumulative in-degree distribution') + @Cached.method(name="the cumulative in-degree distribution") def indegree_cdf(self): """ Return the cumulative in-degree frequency distribution. @@ -1997,8 +1914,7 @@ def indegree_cdf(self): ki = self.indegree() return self._cum_histogram(values=ki, n_bins=ki.max() + 1)[0] - @cached_const('base', 'outdegree cdf', - 'the cumulative out-degree distribution') + @Cached.method(name="the cumulative out-degree distribution") def outdegree_cdf(self): """ Return the cumulative out-degree frequency distribution. @@ -2015,8 +1931,8 @@ def outdegree_cdf(self): ko = self.outdegree() return self._cum_histogram(values=ko, n_bins=ko.max() + 1)[0] - # FIXME: should rather return the weighted distribution! -# @cached_const('nsi', 'degree hist', 'a n.s.i. degree frequency histogram') + @Cached.method(name="a n.s.i. degree frequency histogram", + attrs=("_mut_nw",)) def nsi_degree_histogram(self, typical_weight=None): """ Return a frequency (!) histogram of n.s.i. degree. @@ -2041,9 +1957,8 @@ def nsi_degree_histogram(self, typical_weight=None): return self._histogram(values=nsi_k, n_bins=int(nsi_k.max()/nsi_k.min()) + 1) - # FIXME: should rather return the weighted distribution! -# @cached_const('nsi', 'degree hist', -# 'a cumulative n.s.i. degree frequency histogram') + @Cached.method(name="a cumulative n.s.i. degree frequency histogram", + attrs=("_mut_nw",)) def nsi_degree_cumulative_histogram(self, typical_weight=None): """ Return a cumulative frequency (!) histogram of n.s.i. degree. @@ -2067,7 +1982,7 @@ def nsi_degree_cumulative_histogram(self, typical_weight=None): return self._cum_histogram(values=nsi_k, n_bins=int(nsi_k.max()/nsi_k.min()) + 1) - @cached_const('base', 'avg nbr degree', "average neighbours' degrees") + @Cached.method(name="average neighbours' degrees") def average_neighbors_degree(self): """ For each node, return the average degree of its neighbors. @@ -2085,7 +2000,7 @@ def average_neighbors_degree(self): k = self.degree() * 1.0 return self.undirected_adjacency() * k / k[k != 0] - @cached_const('base', 'max nbr degree', "maximum neighbours' degree") + @Cached.method(name="maximum neighbours' degrees") def max_neighbors_degree(self): """ For each node, return the maximal degree of its neighbors. @@ -2103,7 +2018,8 @@ def max_neighbors_degree(self): nbks = self.undirected_adjacency().multiply(self.degree()) return nbks.max(axis=1).T.A.squeeze() - @cached_const('nsi', 'avg nbr degree', "n.s.i. average neighbours' degree") + @Cached.method(name="n.s.i. average neighbours' degrees", + attrs=("_mut_nw",)) def nsi_average_neighbors_degree(self): """ For each node, return the average n.s.i. degree of its neighbors. @@ -2142,7 +2058,8 @@ def nsi_average_neighbors_degree(self): return self.sp_Aplus() * (self.sp_diag_w() * nsi_k) / nsi_k # TODO: enable correction by typical_weight - @cached_const('nsi', 'max nbr degree', "n.s.i. maximum neighbour degree") + @Cached.method(name="n.s.i. maximum neighbours' degrees", + attrs=("_mut_nw",)) def nsi_max_neighbors_degree(self): """ For each node, return the maximal n.s.i. degree of its neighbors. @@ -2176,7 +2093,7 @@ def nsi_max_neighbors_degree(self): # Measures of clustering, transitivity and cliquishness # - @cached_const('base', 'local clustering', 'local clustering coefficients') + @Cached.method(name="local clustering coefficients") def local_clustering(self): """ For each node, return its (Watts-Strogatz) clustering coefficient. @@ -2198,8 +2115,7 @@ def local_clustering(self): C[np.isnan(C)] = 0 return C - @cached_const('base', 'global clustering', - 'global clustering coefficient (C_2)') + @Cached.method(name="the global clustering coefficient (C_2)") def global_clustering(self): """ Return the global (Watts-Strogatz) clustering coefficient. @@ -2259,14 +2175,14 @@ def _motif_clustering_helper(self, t_func, T, key=None, nsi=False, return ((numerator/typical_weight**2 - 3.0*bilk - 1.0) / (T - ksum/typical_weight - bilk + 2)) - @cached_var('local cyclemotif', 'local cycle motif clustering coefficient') + @Cached.method(name="the local cycle motif clustering coefficients") def local_cyclemotif_clustering(self, key=None): """ For each node, return the clustering coefficient with respect to the cycle motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. **Example:** @@ -2282,14 +2198,14 @@ def t_func(x, xT): T = self.indegree() * self.outdegree() - self.bildegree() return self._motif_clustering_helper(t_func, T, key=key) - @cached_var('local midmotif', 'local mid. motif clustering coefficient') + @Cached.method(name="the local mid. motif clustering coefficients") def local_midmotif_clustering(self, key=None): """ For each node, return the clustering coefficient with respect to the mid. motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. **Example:** @@ -2305,14 +2221,14 @@ def t_func(x, xT): T = self.indegree() * self.outdegree() - self.bildegree() return self._motif_clustering_helper(t_func, T, key=key) - @cached_var('local inmotif', 'local in motif clustering coefficient') + @Cached.method(name="the local in motif clustering coefficients") def local_inmotif_clustering(self, key=None): """ For each node, return the clustering coefficient with respect to the in motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. **Example:** @@ -2328,14 +2244,14 @@ def t_func(x, xT): T = self.indegree() * (self.indegree() - 1) return self._motif_clustering_helper(t_func, T, key=key) - @cached_var('local outmotif', 'local out motif clustering coefficient') + @Cached.method(name="the local out motif clustering coefficients") def local_outmotif_clustering(self, key=None): """ For each node, return the clustering coefficient with respect to the out motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. **Example:** @@ -2351,15 +2267,15 @@ def t_func(x, xT): T = self.outdegree() * (self.outdegree() - 1) return self._motif_clustering_helper(t_func, T, key=key) -# @cached_var('nsi local cyclemotif', -# 'local nsi cycle motif clustering coefficient') + @Cached.method(name="the local n.s.i. cycle motif clustering coefficients", + attrs=("_mut_nw",)) def nsi_local_cyclemotif_clustering(self, key=None, typical_weight=None): """ For each node, return the nsi clustering coefficient with respect to the cycle motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. Reference: [Zemp2014]_ @@ -2399,15 +2315,15 @@ def t_func(x, xT): t_func, T, key=key, nsi=True, typical_weight=typical_weight, ksum=ksum) -# @cached_var('nsi local midemotif', -# 'local nsi mid. motif clustering coefficient') + @Cached.method(name="the local n.s.i. mid. motif clustering coefficients", + attrs=("_mut_nw",)) def nsi_local_midmotif_clustering(self, key=None, typical_weight=None): """ For each node, return the nsi clustering coefficient with respect to the mid motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. Reference: [Zemp2014]_ @@ -2447,15 +2363,15 @@ def t_func(x, xT): t_func, T, key=key, nsi=True, typical_weight=typical_weight, ksum=ksum) -# @cached_var('nsi local inemotif', -# 'local nsi in motif clustering coefficient') + @Cached.method(name="the local n.s.i. in motif clustering coefficients", + attrs=("_mut_nw",)) def nsi_local_inmotif_clustering(self, key=None, typical_weight=None): """ For each node, return the nsi clustering coefficient with respect to the in motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. Reference: [Zemp2014]_ @@ -2495,15 +2411,15 @@ def t_func(x, xT): t_func, T, key=key, nsi=True, typical_weight=typical_weight, ksum=ksum) -# @cached_var('nsi local outemotif', -# 'local nsi out motif clustering coefficient') + @Cached.method(name="the local n.s.i. out motif clustering coefficients", + attrs=("_mut_nw",)) def nsi_local_outmotif_clustering(self, key=None, typical_weight=None): """ For each node, return the nsi clustering coefficient with respect to the out motif. If a link attribute key is specified, return the associated link - weighted version + weighted version. Reference: [Zemp2014]_ @@ -2542,7 +2458,7 @@ def t_func(x, xT): t_func, T, key=key, nsi=True, typical_weight=typical_weight, ksum=ksum) - @cached_const('base', 'transitivity', 'transitivity coefficient (C_1)') + @Cached.method(name="transitivity coefficient (C_1)") def transitivity(self): """ Return the transitivity (coefficient). @@ -2766,7 +2682,7 @@ def assortativity(self): num2 = (num2 / (2 * m)) ** 2 return (num1 - num2) / (den1 - num2) -# @cached_var('nsi', 'local clustering') + @Cached.method(name="n.s.i. local clustering", attrs=("_mut_nw",)) def nsi_local_clustering(self, typical_weight=None): """ For each node, return its uncorrected (between 0 and 1) or corrected @@ -2823,8 +2739,8 @@ def nsi_local_clustering(self, typical_weight=None): numerator = (Ap_Dw * Ap_Dw * Ap).diagonal() return (numerator/typical_weight**2 - 3.0*k - 1.0) / (k * (k-1.0)) - @cached_const('nsi', 'global clustering', - 'n.s.i. global topological clustering coefficient') + @Cached.method(name="the n.s.i. global topological clustering coefficient", + attrs=("_mut_nw",)) def nsi_global_clustering(self): """ Return the n.s.i. global clustering coefficient. @@ -2853,7 +2769,7 @@ def nsi_global_clustering(self): return (self.nsi_local_clustering().dot(self.node_weights) / self.total_node_weight) - @cached_const('nsi', 'transitivity', 'n.s.i. transitivity') + @Cached.method(name="n.s.i. transitivity", attrs=("_mut_nw",)) def nsi_transitivity(self): """ Return the n.s.i. transitivity. @@ -2873,8 +2789,8 @@ def nsi_transitivity(self): return num / denum - @cached_const('nsi', 'soffer clustering', - 'n.s.i. local Soffer clustering coefficients') + @Cached.method(name="the n.s.i. local Soffer clustering coefficients", + attrs=("_mut_nw",)) def nsi_local_soffer_clustering(self): """ For each node, return its n.s.i. clustering coefficient @@ -2921,7 +2837,7 @@ def nsi_local_soffer_clustering(self): # Measure path lengths # - @cached_var('paths') + @Cached.method(name="path lengths") def path_lengths(self, link_attribute=None): """ For each pair of nodes i,j, return the (weighted) shortest path length @@ -3014,8 +2930,8 @@ def average_path_length(self, link_attribute=None): return average_path_length - @cached_const('nsi', 'avg path length', - 'n.s.i. average shortest path length') + @Cached.method(name="the n.s.i. average shortest path length", + attrs=("_mut_nw",)) def nsi_average_path_length(self): """ Return the n.s.i. average shortest path length between all pairs of @@ -3083,7 +2999,7 @@ def diameter(self, directed=True, only_connected=True): # Link valued measures # - @cached_const('base', 'matching idx', 'matching index matrix') + @Cached.method(name="matching index matrix") def matching_index(self): """ For each pair of nodes, return their matching index. @@ -3108,7 +3024,7 @@ def matching_index(self): kk = np.repeat([self.degree()], self.N, axis=0) return commons / (kk + kk.T - commons) - @cached_const('base', 'link btw', 'link betweenness') + @Cached.method(name="link betweenness") def link_betweenness(self): """ For each link, return its betweenness. @@ -3175,7 +3091,7 @@ def edge_betweenness(self): # Node valued centrality measures # - @cached_const('base', 'btw', 'node betweenness') + @Cached.method(name="node betweenness") def betweenness(self): """ For each node, return its betweenness. @@ -3199,7 +3115,6 @@ def betweenness(self): return np.abs(np.array(self.graph.betweenness())) - # @cached_const('base', 'inter btw', 'interregional betweenness') def interregional_betweenness(self, sources=None, targets=None): """ For each node, return its interregional betweenness for given sets @@ -3234,9 +3149,8 @@ def interregional_betweenness(self, sources=None, targets=None): :rtype: 1d numpy array [node] of floats """ return self.nsi_betweenness(sources=sources, targets=targets, - aw=0, silent=1) + nsi=False) - # @cached_const('nsi', 'inter btw', 'n.s.i. interregional betweenness') def nsi_interregional_betweenness(self, sources, targets): """ For each node, return its n.s.i. interregional betweenness for given @@ -3262,9 +3176,10 @@ def nsi_interregional_betweenness(self, sources, targets): :rtype: 1d numpy array [node] of floats """ - return self.nsi_betweenness(sources=sources, targets=targets, silent=1) + return self.nsi_betweenness(sources=sources, targets=targets) - def nsi_betweenness(self, parallelize: bool = False, **kwargs): + def nsi_betweenness(self, sources=None, targets=None, + nsi: bool = True, parallelize: bool = False): """ For each node, return its n.s.i. betweenness. [Newman2001]_ @@ -3294,26 +3209,33 @@ def nsi_betweenness(self, parallelize: bool = False, **kwargs): :arg bool parallelize: Toggle multiprocessing :rtype: 1d numpy array [node] of floats """ - if self.silence_level <= 1: - if "silent" not in kwargs: - print("Calculating n.s.i. betweenness...") - - w = self.node_weights - if "aw" in kwargs: - if kwargs["aw"] == 0: - w = np.ones_like(w) - N, k = self.N, self.outdegree() - # initialize node lists - is_source = np.zeros(N, dtype=MASK) - if "sources" in kwargs and kwargs["sources"] is not None: - is_source[kwargs["sources"]] = 1 + is_source = np.zeros(self.N, dtype=MASK) + if sources is not None: + is_source[sources] = 1 else: - is_source[range(0, N)] = 1 - if "targets" in kwargs and kwargs["targets"] is not None: - targets = np.array(list(map(int, kwargs["targets"]))) + is_source[range(0, self.N)] = 1 + if targets is not None: + targets = np.array(list(map(int, targets))) else: - targets = np.arange(0, N) + targets = np.arange(0, self.N) + + # call cached worker method with hashable arguments + return self._nsi_betweenness( + tuple(is_source), tuple(targets), nsi, parallelize) + + @Cached.method(name="n.s.i. betweenness", attrs=("_mut_nw",)) + def _nsi_betweenness(self, is_source: Tuple[MASK], targets: Tuple[NODE], + nsi: bool, parallelize: bool): + # type cast inputs + assert all(isinstance(arg, tuple) for arg in [is_source, targets]) + is_source = np.array(is_source, dtype=MASK) + targets = np.array(targets, dtype=NODE) + k = to_cy(self.outdegree(), DEGREE) + + # initialize node weights + w = to_cy(self.node_weights, DWEIGHT) + w = w if nsi else np.ones_like(w) # sort links by node indices (contains each link twice!) links = nz_coords(self.sp_A) @@ -3321,22 +3243,23 @@ def nsi_betweenness(self, parallelize: bool = False, **kwargs): # neighbours of each node flat_neighbors = to_cy(np.array(links)[:, 1], NODE) assert k.sum() == len(flat_neighbors) == 2 * self.n_links - w, k = to_cy(w, DWEIGHT), to_cy(k, DEGREE) - worker = partial(_nsi_betweenness, N, w, k, flat_neighbors, is_source) + # call Cython implementation + worker = partial(_nsi_betweenness, + self.N, w, k, flat_neighbors, is_source) if parallelize: # (naively) parallelize loop over nodes n_workers = cpu_count() - batches = np.array_split(to_cy(targets, NODE), n_workers) + batches = np.array_split(targets, n_workers) with get_context("spawn").Pool() as pool: betw_w = np.sum(pool.map(worker, batches), axis=0) pool.close() pool.join() else: - betw_w = worker(to_cy(targets, NODE)) + betw_w = worker(targets) return betw_w / w - @cached_const('base', 'ev centrality', 'eigenvector centrality') + @Cached.method(name="eigenvector centrality") def eigenvector_centrality(self): """ For each node, return its eigenvector centrality. @@ -3360,7 +3283,7 @@ def eigenvector_centrality(self): ec *= np.sign(ec[0]) return ec / ec.max() - @cached_const('nsi', 'ev centrality', 'n.s.i. eigenvector centrality') + @Cached.method(name="n.s.i. eigenvector centrality", attrs=("_mut_nw",)) def nsi_eigenvector_centrality(self): """ For each node, return its n.s.i. eigenvector centrality. @@ -3490,7 +3413,7 @@ def closeness(self, link_attribute=None): return CC - @cached_const('nsi', 'closeness', 'n.s.i. closeness') + @Cached.method(name="n.s.i. closeness", attrs=("_mut_nw",)) def nsi_closeness(self): """ For each node, return its n.s.i. closeness. @@ -3527,7 +3450,7 @@ def nsi_closeness(self): return (self.total_node_weight / np.dot(nsi_distances, self.node_weights)) - @cached_const('nsi', 'harm closeness', 'n.s.i. harmonic closeness') + @Cached.method(name="n.s.i. harmonic closeness", attrs=("_mut_nw",)) def nsi_harmonic_closeness(self): """ For each node, return its n.s.i. harmonic closeness. @@ -3555,8 +3478,8 @@ def nsi_harmonic_closeness(self): return (np.dot(1.0 / nsi_distances, self.node_weights) / self.total_node_weight) - @cached_const('nsi', 'exp closeness', - 'n.s.i. exponential closeness centrality') + @Cached.method(name="n.s.i. exponential closeness centrality", + attrs=("_mut_nw",)) def nsi_exponential_closeness(self): """ For each node, return its n.s.i. exponential harmonic closeness. @@ -3584,7 +3507,7 @@ def nsi_exponential_closeness(self): return (np.dot(2.0**(-nsi_distances), self.node_weights) / self.total_node_weight) - @cached_const('base', 'arenas btw', 'Arenas-type random walk betweenness') + @Cached.method(name="Arenas-type random walk betweenness") def arenas_betweenness(self): """ For each node, return its Arenas-type random walk betweenness. @@ -3913,7 +3836,7 @@ def nsi_arenas_betweenness(self, exclude_neighbors=True, return nsi_arenas_betweenness - @cached_const('base', 'newman btw', "Newman's random walk betweenness") + @Cached.method(name="Newman's random walk betweenness") def newman_betweenness(self): """ For each node, return Newman's random walk betweenness. @@ -4249,7 +4172,7 @@ def global_efficiency(self, link_attribute=None): return efficiency - @cached_const('nsi', 'global eff', 'n.s.i. global efficiency') + @Cached.method(name="n.s.i. global efficiency", attrs=("_mut_nw",)) def nsi_global_efficiency(self): """ Return the n.s.i. global efficiency. @@ -4390,7 +4313,7 @@ def local_vulnerability(self, link_attribute=None): # Community measures # - @cached_const('base', 'coreness', 'coreness') + @Cached.method(name="coreness") def coreness(self): """ For each node, return its coreness. @@ -4414,8 +4337,7 @@ def coreness(self): # Synchronizability measures # - @cached_const('base', 'msf sync', - 'master stability function synchronizability') + @Cached.method(name="the master stability function synchronizability") def msf_synchronizability(self): """ Return the synchronizability in the master stability function @@ -4424,8 +4346,8 @@ def msf_synchronizability(self): This is equal to the largest eigenvalue of the graph Laplacian divided by the smallest non-zero eigenvalue. A smaller value indicates higher synchronizability and vice versa. This function makes sense for - undirected climate networks (with symmetric laplacian matrix). - For directed networks, the undirected laplacian matrix is used. + undirected climate networks (with symmetric Laplacian matrix). + For directed networks, the undirected Laplacian matrix is used. (see [Pecora1998]_) diff --git a/src/pyunicorn/core/spatial_network.py b/src/pyunicorn/core/spatial_network.py index ca8fc6f..3dfb332 100644 --- a/src/pyunicorn/core/spatial_network.py +++ b/src/pyunicorn/core/spatial_network.py @@ -16,26 +16,20 @@ Provides class for analyzing spatially embedded complex networks. """ -# array object and fast numerics +from typing import Tuple, Hashable + import numpy as np -# random number generation from numpy import random -# high performance graph theory tools written in pure ANSI-C import igraph from ._ext.types import to_cy, ADJ, NODE, FIELD, DEGREE from ._ext.numerics import _randomly_rewire_geomodel_I, \ _randomly_rewire_geomodel_II, _randomly_rewire_geomodel_III -from .network import cached_const from .network import Network from .grid import Grid -# -# Define class SpatialNetwork -# - class SpatialNetwork(Network): """ Encapsulates a spatially embedded network. @@ -48,8 +42,8 @@ class SpatialNetwork(Network): # Definitions of internal methods # - def __init__(self, grid, adjacency=None, edge_list=None, directed=False, - silence_level=0): + def __init__(self, grid: Grid, adjacency=None, edge_list=None, + directed=False, silence_level=0): """ Initialize an instance of SpatialNetwork. @@ -64,29 +58,28 @@ def __init__(self, grid, adjacency=None, edge_list=None, directed=False, directed. :arg int silence_level: The inverse level of verbosity of the object. """ - self.grid = grid + assert isinstance(grid, Grid) + self.grid: Grid = grid """(Grid) - Grid object describing the network's spatial embedding""" # Call constructor of parent class Network Network.__init__(self, adjacency=adjacency, edge_list=edge_list, directed=directed, silence_level=silence_level) + def __cache_state__(self) -> Tuple[Hashable, ...]: + # The following attributes are assumed immutable: + # (grid) + return Network.__cache_state__(self) + + def __rec_cache_state__(self) -> Tuple[object, ...]: + return (self.grid,) + def __str__(self): """ Return a string representation of the SpatialNetwork object. """ return f'SpatialNetwork:\n{Network.__str__(self)}' - def clear_cache(self): - """ - Clean up cache. - - Is reversible, since all cached information can be recalculated from - basic data. - """ - Network.clear_cache(self) - self.grid.clear_cache() - # # Load and save GeoNetwork object # @@ -203,9 +196,8 @@ def Load(filename, fileformat=None, silence_level=0, *args, **kwds): # Overwrite igraph Graph object in Network instance to restore link # attributes/weights net.graph = graph - # Restore link attributes/weights - net.clear_paths_cache() - + # invalidate cache + net._mut_la += 1 return net @staticmethod @@ -669,13 +661,13 @@ def max_link_distance(self): # Link weighted network measures # - @cached_const('base', 'distance') def distance(self): """ Return the distance matrix. """ dist = self.grid.distance() - self.set_link_attribute('distance', dist) + if not self.find_link_attribute('distance'): + self.set_link_attribute('distance', dist) return dist def average_distance_weighted_path_length(self): diff --git a/src/pyunicorn/eventseries/event_series.py b/src/pyunicorn/eventseries/event_series.py index e13dca7..6504a4b 100644 --- a/src/pyunicorn/eventseries/event_series.py +++ b/src/pyunicorn/eventseries/event_series.py @@ -25,19 +25,17 @@ point processes as a null model (for ECA only) or a Monte Carlo approach. """ -# -# Imports -# +from typing import Tuple, Hashable + import warnings import numpy as np - from scipy import stats -from .. import cached_const +from ..core.cache import Cached -class EventSeries: +class EventSeries(Cached): def __init__(self, data, timestamps=None, taumax=np.inf, lag=0.0, threshold_method=None, threshold_values=None, @@ -145,10 +143,6 @@ def __init__(self, data, timestamps=None, taumax=np.inf, lag=0.0, NrOfEvs = np.array(np.sum(self.__eventmatrix, axis=0), dtype=int) self.__nrofevents = NrOfEvs - # Dictionary for chached constants - self.cache = {'base': {}} - """(dict) cache of re-usable computation results""" - # Dictionary of symmetrization functions for later use self.symmetrization_options = { 'directed': EventSeries._symmetrization_directed, @@ -159,6 +153,11 @@ def __init__(self, data, timestamps=None, taumax=np.inf, lag=0.0, 'min': EventSeries._symmetrization_min } + def __cache_state__(self) -> Tuple[Hashable, ...]: + # The following attributes are assumed immutable: + # (__eventmatrix, __timestamps, __taumax, __lag) + return () + def __str__(self): """ Return a string representation of the EventSeries object. @@ -596,16 +595,15 @@ def _eca_coincidence_rate(self, eventseriesx, eventseriesy, :type ts2: 1D Numpy array :arg ts2: Event time array containing time points when events of event series 2 occur, not obligatory - :type window_type: str {'retarded', 'advanced', 'symmetric'} - :arg window_type: Only for ECA. Determines if precursor coincidence - rate ('advanced'), trigger coincidence rate - ('retarded') or a general coincidence rate with the - symmetric interval [-taumax, taumax] are computed - ('symmetric'). Default: 'symmetric' - :rtype list - :return [Precursor coincidence rate XY, Precursor coincidence rate YX] + :type window_type: str {'retarded', 'advanced', 'symmetric'} + :arg window_type: Only for ECA. Determines if precursor coincidence + rate ('advanced'), trigger coincidence rate + ('retarded') or a general coincidence rate with the + symmetric interval [-taumax, taumax] are computed + ('symmetric'). Default: 'symmetric' + :rtype: list + :return: Precursor coincidence rates [XY, YX] """ - # Get time indices if ts1 is None: e1 = np.where(eventseriesx)[0] @@ -769,7 +767,7 @@ def event_series_analysis(self, method='ES', symmetrization='directed', # Use symmetrization functions for symmetrization and return result return self.symmetrization_options[symmetrization](directedESMatrix) - @cached_const('base', 'directedES') + @Cached.method() def _ndim_event_synchronization(self): """ Compute NxN event synchronization matrix [i,j] with event @@ -839,7 +837,7 @@ def _empirical_percentiles(self, method=None, n_surr=1000, :type n_surr: int :arg n_surr: number of surrogates for Monte-Carlo method :type symmetrization: str {'directed', 'symmetric', 'antisym', - 'mean', 'max', 'min'} for ES, + 'mean', 'max', 'min'} for ES, str {'directed', 'mean', 'max', 'min'} for ECA :arg symmetrization: determines if and which symmetrisation method should be used for the ES/ECA score matrix diff --git a/src/pyunicorn/timeseries/_ext/numerics.pyx b/src/pyunicorn/timeseries/_ext/numerics.pyx index 8300440..a0368ee 100644 --- a/src/pyunicorn/timeseries/_ext/numerics.pyx +++ b/src/pyunicorn/timeseries/_ext/numerics.pyx @@ -624,7 +624,7 @@ def _twin_surrogates_r(int n_surrogates, int N, int dim, twins, # parameters for `_line_dist()` ctypedef int (*line_type_i2J) (int, int) ctypedef int (*line_type_ij2I)(int, int, int) -ctypedef FIELD_t (*metric_type)(int, int, int, FIELD_t[:,:]) +ctypedef DFIELD_t (*metric_type)(int, int, int, DFIELD_t[:,:]) cdef: inline int i2J_vertline(int i, int N): return N @@ -632,10 +632,10 @@ cdef: inline int ij2I_vertline(int i, int j, int N): return i inline int ij2I_diagline(int i, int j, int N): return N - i + j metric_type metric_null = NULL - inline FIELD_t metric_supremum(int I, int j, int dim, FIELD_t[:,:] E): + inline DFIELD_t metric_supremum(int I, int j, int dim, DFIELD_t[:,:] E): cdef: int l - FIELD_t diff = 0, tmp_diff + DFIELD_t diff = 0, tmp_diff for l in range(dim): tmp_diff = abs(E[I, l] - E[j, l]) if tmp_diff > diff: @@ -645,7 +645,7 @@ cdef: cdef void _line_dist( int n_time, ndarray[NODE_t, ndim=1] hist, - ndarray[LAG_t, ndim=2] R, ndarray[FIELD_t, ndim=2] E, float eps, int dim, + ndarray[LAG_t, ndim=2] R, ndarray[DFIELD_t, ndim=2] E, float eps, int dim, metric_type metric, bint black, ndarray[MASK_t, ndim=1, cast=True] M, bint missing_values, line_type_i2J i2J, line_type_ij2I ij2I, bint skip_main): @@ -710,7 +710,7 @@ cdef void _line_dist( def _vertline_dist( int n_time, ndarray[NODE_t, ndim=1] hist, ndarray[LAG_t, ndim=2] R): cdef: - ndarray[FIELD_t, ndim=2] E_null = np.array([[]], dtype=FIELD) + ndarray[DFIELD_t, ndim=2] E_null = np.array([[]], dtype=DFIELD) ndarray[MASK_t, ndim=1] M_null = np.array([], dtype=MASK) _line_dist( n_time, hist, R, E_null, 0, 0, metric_null, True, M_null, False, @@ -719,7 +719,7 @@ def _vertline_dist( def _diagline_dist( int n_time, ndarray[NODE_t, ndim=1] hist, ndarray[LAG_t, ndim=2] R): cdef: - ndarray[FIELD_t, ndim=2] E_null = np.array([[]], dtype=FIELD) + ndarray[DFIELD_t, ndim=2] E_null = np.array([[]], dtype=DFIELD) ndarray[MASK_t, ndim=1] M_null = np.array([], dtype=MASK) _line_dist( n_time, hist, R, E_null, 0, 0, metric_null, True, M_null, False, @@ -728,7 +728,7 @@ def _diagline_dist( def _white_vertline_dist( int n_time, ndarray[NODE_t, ndim=1] hist, ndarray[LAG_t, ndim=2] R): cdef: - ndarray[FIELD_t, ndim=2] E_null = np.array([[]], dtype=FIELD) + ndarray[DFIELD_t, ndim=2] E_null = np.array([[]], dtype=DFIELD) ndarray[MASK_t, ndim=1] M_null = np.array([], dtype=MASK) _line_dist( n_time, hist, R, E_null, 0, 0, metric_null, False, M_null, False, @@ -736,7 +736,7 @@ def _white_vertline_dist( def _vertline_dist_sequential( int n_time, ndarray[NODE_t, ndim=1] hist, - ndarray[FIELD_t, ndim=2] E, float eps, int dim): + ndarray[DFIELD_t, ndim=2] E, float eps, int dim): cdef: ndarray[LAG_t, ndim=2] null_R = np.array([[]], dtype=LAG) ndarray[MASK_t, ndim=1] M_null = np.array([], dtype=MASK) @@ -746,7 +746,7 @@ def _vertline_dist_sequential( def _diagline_dist_sequential( int n_time, ndarray[NODE_t, ndim=1] hist, - ndarray[FIELD_t, ndim=2] E, float eps, int dim): + ndarray[DFIELD_t, ndim=2] E, float eps, int dim): cdef: ndarray[LAG_t, ndim=2] null_R = np.array([[]], dtype=LAG) ndarray[MASK_t, ndim=1] M_null = np.array([], dtype=MASK) @@ -758,7 +758,7 @@ def _vertline_dist_missingvalues( int n_time, ndarray[NODE_t, ndim=1] hist, ndarray[LAG_t, ndim=2] R, ndarray[MASK_t, ndim=1, cast=True] M): cdef: - ndarray[FIELD_t, ndim=2] E_null = np.array([[]], dtype=FIELD) + ndarray[DFIELD_t, ndim=2] E_null = np.array([[]], dtype=DFIELD) _line_dist( n_time, hist, R, E_null, 0, 0, metric_null, True, M, True, i2J_vertline, ij2I_vertline, False) @@ -767,7 +767,7 @@ def _diagline_dist_missingvalues( int n_time, ndarray[NODE_t, ndim=1] hist, ndarray[LAG_t, ndim=2] R, ndarray[MASK_t, ndim=1, cast=True] M): cdef: - ndarray[FIELD_t, ndim=2] E_null = np.array([[]], dtype=FIELD) + ndarray[DFIELD_t, ndim=2] E_null = np.array([[]], dtype=DFIELD) _line_dist( n_time, hist, R, E_null, 0, 0, metric_null, True, M, True, i2J_diagline, ij2I_diagline, True) @@ -775,7 +775,7 @@ def _diagline_dist_missingvalues( def _vertline_dist_sequential_missingvalues( int n_time, ndarray[NODE_t, ndim=1] hist, ndarray[MASK_t, ndim=1, cast=True] M, - ndarray[FIELD_t, ndim=2] E, float eps, int dim): + ndarray[DFIELD_t, ndim=2] E, float eps, int dim): cdef: ndarray[LAG_t, ndim=2] null_R = np.array([[]], dtype=LAG) _line_dist( @@ -785,7 +785,7 @@ def _vertline_dist_sequential_missingvalues( def _diagline_dist_sequential_missingvalues( int n_time, ndarray[NODE_t, ndim=1] hist, ndarray[MASK_t, ndim=1, cast=True] M, - ndarray[FIELD_t, ndim=2] E, float eps, int dim): + ndarray[DFIELD_t, ndim=2] E, float eps, int dim): cdef: ndarray[LAG_t, ndim=2] null_R = np.array([[]], dtype=LAG) _line_dist( diff --git a/src/pyunicorn/timeseries/cross_recurrence_plot.py b/src/pyunicorn/timeseries/cross_recurrence_plot.py index de1bc29..aab664f 100644 --- a/src/pyunicorn/timeseries/cross_recurrence_plot.py +++ b/src/pyunicorn/timeseries/cross_recurrence_plot.py @@ -18,22 +18,18 @@ analysis (RQA) and recurrence network analysis. """ -# array object and fast numerics +from typing import Tuple, Hashable + import numpy as np +from ..core.cache import Cached +from .recurrence_plot import RecurrencePlot from ..core._ext.types import to_cy, DFIELD from ._ext.numerics import _manhattan_distance_matrix_crp, \ _euclidean_distance_matrix_crp, _supremum_distance_matrix_crp -from .recurrence_plot import RecurrencePlot - -# -# Class definitions -# - class CrossRecurrencePlot(RecurrencePlot): - """ Class CrossRecurrencePlot for generating and quantitatively analyzing :index:`cross recurrence plots `. @@ -137,12 +133,11 @@ def __init__(self, x, y, metric="supremum", normalize=False, dim = kwds.get("dim") tau = kwds.get("tau") - if dim is not None and tau is not None: + self._mut_embedding: int = 0 + if (dim is not None) and (tau is not None): # Embed the time series self.x_embedded = self.embed_time_series(self.x, dim, tau) - """The embedded time series x.""" self.y_embedded = self.embed_time_series(self.y, dim, tau) - """The embedded time series y.""" else: self.x_embedded = self.x self.y_embedded = self.y @@ -160,6 +155,9 @@ def __init__(self, x, y, metric="supremum", normalize=False, raise NameError("Please give either threshold or recurrence_rate \ to construct the cross recurrence plot!") + def __cache_state__(self) -> Tuple[Hashable, ...]: + return (self._mut_embedding,) + def __str__(self): """ Returns a string representation. @@ -169,6 +167,30 @@ def __str__(self): f"Embedding dimension {self.dim if self.dim else 0}\n" f"Threshold {self.threshold}, {self.metric} metric") + @property + def x_embedded(self) -> np.ndarray: + """ + The embedded time series x. + """ + return self._x_embedded + + @x_embedded.setter + def x_embedded(self, embedding: np.ndarray): + self._x_embedded = to_cy(embedding, DFIELD) + self._mut_embedding += 1 + + @property + def y_embedded(self) -> np.ndarray: + """ + The embedded time series y. + """ + return self._y_embedded + + @y_embedded.setter + def y_embedded(self, embedding: np.ndarray): + self._y_embedded = to_cy(embedding, DFIELD) + self._mut_embedding += 1 + # # Service methods # @@ -182,35 +204,25 @@ def recurrence_matrix(self): """ return self.CR - def distance_matrix(self, x_embedded, y_embedded, metric): + def distance_matrix(self, metric): """ Return phase space cross distance matrix :math:`D` according to the chosen metric. - :type x_embedded: 2D array (time, embedding dimension) - :arg x_embedded: The phase space trajectory x. - :type y_embedded: 2D array (time, embedding dimension) - :arg y_embedded: The phase space trajectory y. :arg str metric: The metric for measuring distances in phase space ("manhattan", "euclidean", "supremum"). :rtype: 2D square array :return: the phase space cross distance matrix :math:`D` """ - # Return distance matrix according to chosen metric: - if metric == "manhattan": - return self.manhattan_distance_matrix(x_embedded, y_embedded) - elif metric == "euclidean": - return self.euclidean_distance_matrix(x_embedded, y_embedded) - elif metric == "supremum": - return self.supremum_distance_matrix(x_embedded, y_embedded) - else: - return None + assert metric in self._known_metrics, f"unknown metric: {metric}" + return getattr(self, f"{metric}_distance_matrix")() # # Calculate recurrence plot # - def manhattan_distance_matrix(self, x_embedded, y_embedded): + @Cached.method(name="the manhattan distance matrix") + def manhattan_distance_matrix(self): """ Return the manhattan distance matrix from two (embedded) time series. @@ -221,63 +233,39 @@ def manhattan_distance_matrix(self, x_embedded, y_embedded): :rtype: 2D rectangular Numpy array :return: the manhattan distance matrix. """ - if self.silence_level <= 1: - print("Calculating the manhattan distance matrix...") - - x_embedded = to_cy(x_embedded, DFIELD) - y_embedded = to_cy(y_embedded, DFIELD) - ntime_x = x_embedded.shape[0] - ntime_y = y_embedded.shape[0] - dim = x_embedded.shape[1] - + ntime_x = self.x_embedded.shape[0] + ntime_y = self.y_embedded.shape[0] + dim = self.x_embedded.shape[1] return _manhattan_distance_matrix_crp(ntime_x, ntime_y, dim, - x_embedded, y_embedded) + self.x_embedded, self.y_embedded) - def euclidean_distance_matrix(self, x_embedded, y_embedded): + @Cached.method(name="the euclidean distance matrix") + def euclidean_distance_matrix(self): """ Return the euclidean distance matrix from two (embedded) time series. - :type x_embedded: 2D Numpy array (time, embedding dimension) - :arg x_embedded: The phase space trajectory x. - :type y_embedded: 2D Numpy array (time, embedding dimension) - :arg y_embedded: The phase space trajectory y. :rtype: 2D rectangular Numpy array :return: the euclidean distance matrix. """ - if self.silence_level <= 1: - print("Calculating the euclidean distance matrix...") - - x_embedded = to_cy(x_embedded, DFIELD) - y_embedded = to_cy(y_embedded, DFIELD) - ntime_x = x_embedded.shape[0] - ntime_y = y_embedded.shape[0] - dim = x_embedded.shape[1] - + ntime_x = self.x_embedded.shape[0] + ntime_y = self.y_embedded.shape[0] + dim = self.x_embedded.shape[1] return _euclidean_distance_matrix_crp(ntime_x, ntime_y, dim, - x_embedded, y_embedded) + self.x_embedded, self.y_embedded) - def supremum_distance_matrix(self, x_embedded, y_embedded): + @Cached.method(name="the supremum distance matrix") + def supremum_distance_matrix(self): """ Return the supremum distance matrix from two (embedded) time series. - :type x_embedded: 2D Numpy array (time, embedding dimension) - :arg x_embedded: The phase space trajectory x. - :type y_embedded: 2D Numpy array (time, embedding dimension) - :arg y_embedded: The phase space trajectory y. :rtype: 2D rectangular Numpy array :return: the supremum distance matrix. """ - if self.silence_level <= 1: - print("Calculating the supremum distance matrix...") - - x_embedded = to_cy(x_embedded, DFIELD) - y_embedded = to_cy(y_embedded, DFIELD) - ntime_x = x_embedded.shape[0] - ntime_y = y_embedded.shape[0] - dim = x_embedded.shape[1] - + ntime_x = self.x_embedded.shape[0] + ntime_y = self.y_embedded.shape[0] + dim = self.x_embedded.shape[1] return _supremum_distance_matrix_crp(ntime_x, ntime_y, dim, - x_embedded, y_embedded) + self.x_embedded, self.y_embedded) def set_fixed_threshold(self, threshold): """ @@ -291,21 +279,10 @@ def set_fixed_threshold(self, threshold): if self.silence_level <= 1: print("Calculating cross recurrence plot at fixed threshold...") - # Get distance matrix, according to self.metric - distance = self.distance_matrix(self.x_embedded, self.y_embedded, - self.metric) - # Get length of time series x and y + distance = self.distance_matrix(self.metric) (N, M) = distance.shape - - # Initialize recurrence matrix recurrence = np.zeros((N, M), dtype="int8") - - # Thresholding the distance matrix recurrence[distance < threshold] = 1 - - # Clean up - del distance - self.CR = recurrence self.N = N self.M = M @@ -323,26 +300,12 @@ def set_fixed_recurrence_rate(self, recurrence_rate): print("Calculating cross recurrence plot at " "fixed recurrence rate...") - # Get distance matrix, according to self.metric - distance = self.distance_matrix(self.x_embedded, self.y_embedded, - self.metric) - - # Get length of time series x and y + distance = self.distance_matrix(self.metric) (N, M) = distance.shape - - # Get threshold to obtain fixed recurrence rate threshold = self.threshold_from_recurrence_rate(distance, recurrence_rate) - - # Initialize recurrence matrix recurrence = np.zeros((N, M), dtype="int8") - - # Thresholding the distance matrix recurrence[distance < threshold] = 1 - - # Clean up - del distance - self.CR = recurrence self.N = N self.M = M diff --git a/src/pyunicorn/timeseries/joint_recurrence_plot.py b/src/pyunicorn/timeseries/joint_recurrence_plot.py index 5909e5a..268df86 100644 --- a/src/pyunicorn/timeseries/joint_recurrence_plot.py +++ b/src/pyunicorn/timeseries/joint_recurrence_plot.py @@ -18,20 +18,11 @@ analysis (RQA) and recurrence network analysis. """ -# -# Import essential packages -# - -# array object and fast numerics import numpy as np from .recurrence_plot import RecurrencePlot -# -# Class definitions -# - class JointRecurrencePlot(RecurrencePlot): """ @@ -139,7 +130,6 @@ def __init__(self, x, y, metric=("supremum", "supremum"), # Store type of metric self.metric = metric - self._distance_matrix_cached = False self.JR = None """The joint recurrence matrix.""" self.N = 0 @@ -261,33 +251,18 @@ def set_fixed_threshold(self, threshold): if self.silence_level <= 1: print("Calculating joint recurrence plot at fixed threshold...") - # Disable caching of distances in RecurrencePlot class - self._distance_matrix_cached = False - # Get distance matrix for the first time series - distance = self.distance_matrix(self.x_embedded, self.metric[0]) - - # Get length of time series + self.embedding = self.x_embedded + distance = self.distance_matrix(self.metric[0]) N = distance.shape[0] - - # Initialize first recurrence matrix recurrence_x = np.zeros((N, N), dtype="int8") - - # Thresholding the first distance matrix recurrence_x[distance < threshold[0]] = 1 - - # Clean up del distance - # Disable caching of distances in RecurrencePlot class - self._distance_matrix_cached = False - # Get distance matrix for the second time series - distance = self.distance_matrix(self.y_embedded, self.metric[1]) - - # Initialize second recurrence matrix + self.embedding = self.y_embedded + distance = self.distance_matrix(self.metric[1]) recurrence_y = np.zeros((N, N), dtype="int8") - - # Thresholding the second distance matrix recurrence_y[distance < threshold[1]] = 1 + del distance if self.lag >= 0: self.JR = recurrence_x[:N-self.lag, :N-self.lag] * \ @@ -299,9 +274,6 @@ def set_fixed_threshold(self, threshold): recurrence_x[-self.lag:N, -self.lag:N] self.N = N - # Clean up - del distance, recurrence_x, recurrence_y - def set_fixed_threshold_std(self, threshold_std): """ Set the joint recurrence plot to fixed thresholds in units of the @@ -318,11 +290,8 @@ def set_fixed_threshold_std(self, threshold_std): print("Calculating recurrence plot at fixed threshold " "in units of time series STD...") - # Get absolute threshold threshold_x = threshold_std[0] * self.x.std() threshold_y = threshold_std[1] * self.y.std() - - # Call set fixed threshold method JointRecurrencePlot.\ set_fixed_threshold(self, (threshold_x, threshold_y)) @@ -341,41 +310,22 @@ def set_fixed_recurrence_rate(self, recurrence_rate): print("Calculating joint recurrence plot at " "fixed recurrence rate...") - # Disable caching of distances in RecurrencePlot class - self._distance_matrix_cached = False - # Get distance matrix for the first time series - distance = self.distance_matrix(self.x_embedded, self.metric[0]) - - # Get length of time series + self.embedding = self.x_embedded + distance = self.distance_matrix(self.metric[0]) N = distance.shape[0] - - # Get first threshold to obtain fixed recurrence rate threshold_x = self.\ threshold_from_recurrence_rate(distance, recurrence_rate[0]) - - # Initialize recurrence matrix recurrence_x = np.zeros((N, N), dtype="int8") - - # Thresholding the distance matrix recurrence_x[distance < threshold_x] = 1 - - # Clean up del distance - # Disable caching of distances in RecurrencePlot class - self._distance_matrix_cached = False - # Get distance matrix for the second time series - distance = self.distance_matrix(self.y_embedded, self.metric[1]) - - # Get first threshold to obtain fixed recurrence rate + self.embedding = self.y_embedded + distance = self.distance_matrix(self.metric[1]) threshold_y = self.\ threshold_from_recurrence_rate(distance, recurrence_rate[1]) - - # Initialize recurrence matrix recurrence_y = np.zeros((N, N), dtype="int8") - - # Thresholding the distance matrix recurrence_y[distance < threshold_y] = 1 + del distance if self.lag >= 0: self.JR = recurrence_x[:N-self.lag, :N-self.lag] * \ @@ -386,6 +336,3 @@ def set_fixed_recurrence_rate(self, recurrence_rate): self.JR = recurrence_y[:N+self.lag, :N+self.lag] * \ recurrence_x[-self.lag:N, -self.lag:N] self.N = N - - # Clean up - del distance, recurrence_x, recurrence_y diff --git a/src/pyunicorn/timeseries/recurrence_plot.py b/src/pyunicorn/timeseries/recurrence_plot.py index b9f9668..d099f41 100644 --- a/src/pyunicorn/timeseries/recurrence_plot.py +++ b/src/pyunicorn/timeseries/recurrence_plot.py @@ -19,12 +19,12 @@ """ from math import factorial +from typing import Tuple, Hashable -# array object and fast numerics import numpy as np from numpy.typing import NDArray -# Cython inline code +from ..core.cache import Cached from ..core._ext.types import to_cy, NODE, LAG, FIELD, DFIELD from ._ext.numerics import _embed_time_series, _manhattan_distance_matrix_rp, \ _euclidean_distance_matrix_rp, _supremum_distance_matrix_rp, \ @@ -38,13 +38,7 @@ _rejection_sampling, _white_vertline_dist, _twins_r, _twin_surrogates_r -# -# Class definitions -# - - -class RecurrencePlot: - +class RecurrencePlot(Cached): """ Class RecurrencePlot for generating and quantitatively analyzing :index:`recurrence plots `. @@ -153,7 +147,8 @@ def __init__(self, time_series: NDArray, metric: str = "supremum", self.time_series.shape = (self.time_series.shape[0], -1) # Store type of metric - assert metric in ("manhattan", "euclidean", "supremum") + self._known_metrics = ("manhattan", "euclidean", "supremum") + assert metric in self._known_metrics self.metric = metric """The metric used for measuring distances in phase space.""" @@ -168,19 +163,19 @@ def __init__(self, time_series: NDArray, metric: str = "supremum", self.dim = kwargs.get("dim") self.tau = kwargs.get("tau") - if self.dim is not None and self.tau is not None: - # Embed the time series - self.embedding = self.embed_time_series(self.time_series, self.dim, - self.tau) - """The embedded time series.""" - else: - self.embedding = self.time_series - - self.N = self.embedding.shape[0] + self.N: int = 0 """The number of state vectors (number of lines and rows) of the RP.""" self.R = None """The recurrence matrix.""" + self._mut_embedding: int = 0 + if (self.dim is not None) and (self.tau is not None): + # Embed the time series + self.embedding = self.embed_time_series( + self.time_series, self.dim, self.tau) + else: + self.embedding = self.time_series + # Get missing value indices if self.missing_values: self.missing_value_indices = \ @@ -196,14 +191,6 @@ def __init__(self, time_series: NDArray, metric: str = "supremum", self.adaptive_neighborhood_size = \ kwargs.get("adaptive_neighborhood_size") - # Initialize cache - self._distance_matrix_cached = False - self._distance_matrix = None - self._diagline_dist_cached = False - self._diagline_dist = None - self._vertline_dist_cached = False - self._vertline_dist = None - # Precompute recurrence matrix only if sequential RQA is switched off, # and not calling from child class with respective overriding methods. skip_recurrence = kwargs.get("skip_recurrence") @@ -238,6 +225,9 @@ def __init__(self, time_series: NDArray, metric: str = "supremum", recurrence_rate to construct the recurrence \ plot!") + def __cache_state__(self) -> Tuple[Hashable, ...]: + return (self._mut_embedding,) + def __str__(self): """ Returns a string representation. @@ -247,18 +237,19 @@ def __str__(self): f"Embedding dimension {self.dim if self.dim else 0}\n" f"Threshold {self.threshold}, {self.metric} metric") - def clear_cache(self, irreversible=False): - """Clean up memory.""" - if irreversible: - if self._distance_matrix_cached: - del self._distance_matrix - self._distance_matrix_cached = False - if self._diagline_dist_cached: - del self._diagline_dist - self._diagline_dist_cached = False - if self._vertline_dist_cached: - del self._vertline_dist - self._vertline_dist_cached = False + @property + def embedding(self) -> np.ndarray: + """ + The embedded time series / phase space trajectory + (time, embedding dimension). + """ + return self._embedding + + @embedding.setter + def embedding(self, embedding: np.ndarray): + self._embedding = to_cy(embedding, DFIELD) + self.N = self._embedding.shape[0] + self._mut_embedding += 1 # # Service methods @@ -278,35 +269,18 @@ def recurrence_matrix(self): "Recurrence matrix is not stored in memory.") return None - def distance_matrix(self, embedding, metric): + def distance_matrix(self, metric: str): """ Return phase space distance matrix :math:`D` according to the chosen metric. - :type embedding: 2D array (time, embedding dimension) - :arg embedding: The phase space trajectory. :arg str metric: The metric for measuring distances in phase space ("manhattan", "euclidean", "supremum"). :rtype: 2D square array :return: the phase space distance matrix :math:`D` """ - if not self._distance_matrix_cached: - # Return distance matrix according to chosen metric: - if metric == "manhattan": - self._distance_matrix = \ - RecurrencePlot.manhattan_distance_matrix(self, embedding) - elif metric == "euclidean": - self._distance_matrix = \ - RecurrencePlot.euclidean_distance_matrix(self, embedding) - elif metric == "supremum": - self._distance_matrix = \ - RecurrencePlot.supremum_distance_matrix(self, embedding) - else: - raise ValueError(f"unknown metric: {metric}") - - self._distance_matrix_cached = True - - return self._distance_matrix + assert metric in self._known_metrics, f"unknown metric: {metric}" + return getattr(RecurrencePlot, f"{metric}_distance_matrix")(self) # # Time series handling methods @@ -535,56 +509,40 @@ def complexity_entropy(self): # Calculate recurrence plot # - def manhattan_distance_matrix(self, embedding): + @Cached.method(name="the manhattan distance matrix") + def manhattan_distance_matrix(self): """ Return the manhattan distance matrix from an embedding of a time series. - :type embedding: 2D array (time, embedding dimension) - :arg embedding: The phase space trajectory. :rtype: 2D square array :return: the manhattan distance matrix. """ - if self.silence_level <= 1: - print("Calculating the manhattan distance matrix...") + (n_time, dim) = self.embedding.shape + return _manhattan_distance_matrix_rp(n_time, dim, self.embedding) - (n_time, dim) = embedding.shape - return _manhattan_distance_matrix_rp(n_time, dim, - to_cy(embedding, DFIELD)) - - def euclidean_distance_matrix(self, embedding): + @Cached.method(name="the euclidean distance matrix") + def euclidean_distance_matrix(self): """ Return the euclidean distance matrix from an embedding of a time series. - :type embedding: 2D array (time, embedding dimension) - :arg embedding: The phase space trajectory. :rtype: 2D square array :return: the euclidean distance matrix. """ - if self.silence_level <= 1: - print("Calculating the euclidean distance matrix...") + (n_time, dim) = self.embedding.shape + return _euclidean_distance_matrix_rp(n_time, dim, self.embedding) - (n_time, dim) = embedding.shape - return _euclidean_distance_matrix_rp(n_time, dim, - to_cy(embedding, DFIELD)) - - def supremum_distance_matrix(self, embedding): + @Cached.method(name="the supremum distance matrix") + def supremum_distance_matrix(self): """ Return the supremum distance matrix from an embedding of a time series. - :type embedding: 2D Numpy array (time, embedding dimension) - :arg embedding: The phase space trajectory. - :rtype: 2D square Numpy array :return: the supremum distance matrix. """ - if self.silence_level <= 1: - print("Calculating the supremum distance matrix...") - - (n_time, dim) = embedding.shape - return _supremum_distance_matrix_rp(n_time, dim, - to_cy(embedding, DFIELD)) + (n_time, dim) = self.embedding.shape + return _supremum_distance_matrix_rp(n_time, dim, self.embedding) def set_fixed_threshold(self, threshold): """ @@ -598,20 +556,10 @@ def set_fixed_threshold(self, threshold): if self.silence_level <= 1: print("Calculating recurrence plot at fixed threshold...") - # Get distance matrix, according to self.metric - distance = RecurrencePlot.distance_matrix( - self, self.embedding, self.metric) - - # Get number of nodes + distance = RecurrencePlot.distance_matrix(self, self.metric) n_time = distance.shape[0] - - # Initialize recurrence matrix recurrence = np.zeros((n_time, n_time), dtype="int8") - - # Thresholding the distance matrix recurrence[distance < threshold] = 1 - - # Handle missing values if self.missing_values: # Write missing value lines and rows to recurrence matrix # NaN flag is not supported by int8 data format -> use 0 here @@ -635,10 +583,7 @@ def set_fixed_threshold_std(self, threshold_std): print("Calculating recurrence plot at fixed threshold in units of " "time series STD...") - # Get absolute threshold threshold = threshold_std * self.time_series.std() - - # Call set fixed threshold method RecurrencePlot.set_fixed_threshold(self, threshold) def set_fixed_recurrence_rate(self, recurrence_rate): @@ -653,23 +598,12 @@ def set_fixed_recurrence_rate(self, recurrence_rate): if self.silence_level <= 1: print("Calculating recurrence plot at fixed recurrence rate...") - # Get distance matrix, according to self.metric - distance = RecurrencePlot.distance_matrix( - self, self.embedding, self.metric) - - # Get number of nodes + distance = RecurrencePlot.distance_matrix(self, self.metric) n_time = distance.shape[0] - - # Get threshold to obtain fixed recurrence rate threshold = self.threshold_from_recurrence_rate(distance, recurrence_rate) - - # Initialize recurrence matrix recurrence = np.zeros((n_time, n_time), dtype="int8") - - # Thresholding the distance matrix recurrence[distance < threshold] = 1 - self.R = recurrence def set_fixed_local_recurrence_rate(self, local_recurrence_rate): @@ -689,24 +623,16 @@ def set_fixed_local_recurrence_rate(self, local_recurrence_rate): print("Calculating recurrence plot at fixed " "local recurrence rate...") - # Get distance matrix, according to self.metric - distance = self.distance_matrix(self.embedding, self.metric) - - # Get number of nodes + distance = RecurrencePlot.distance_matrix(self, self.metric) n_time = distance.shape[0] - - # Initialize recurrence matrix recurrence = np.zeros((n_time, n_time), dtype="int8") - for i in range(n_time): # Get threshold for state vector i to obtain fixed local # recurrence rate local_threshold = self.threshold_from_recurrence_rate( distance[i, :], local_recurrence_rate) - # Thresholding the distance matrix for column i recurrence[i, distance[i, :] < local_threshold] = 1 - self.R = recurrence def set_adaptive_neighborhood_size(self, adaptive_neighborhood_size, @@ -733,18 +659,14 @@ def set_adaptive_neighborhood_size(self, adaptive_neighborhood_size, print("Calculating recurrence plot using the " "adaptive neighborhood size algorithm...") - # Get distance matrix, according to self.metric - distance = self.distance_matrix(self.embedding, self.metric) + distance = RecurrencePlot.distance_matrix(self, self.metric) # Get indices that would sort the distance matrix. # sorted_neighbors[i,j] contains the index of the jth nearest neighbor # of i. Sorting order is very important here! sorted_neighbors = to_cy(distance.argsort(axis=1), NODE) - # Get number of nodes n_time = distance.shape[0] - - # Initialize recurrence matrix recurrence = np.zeros((n_time, n_time), dtype=LAG) # Set processing order of state vectors @@ -918,6 +840,8 @@ def recurrence_probability(self, lag=0): # RQA measures based on black diagonal lines # + @Cached.method(attrs=( + "metric", "threshold", "missing_values", "sparse_rqa")) def diagline_dist(self): """ Return the :index:`frequency distribution of diagonal line lengths @@ -940,51 +864,45 @@ def diagline_dist(self): :return: the frequency distribution of diagonal line lengths :math:`P(l-1)`. """ - if self._diagline_dist_cached: - return self._diagline_dist - else: - # Prepare - n_time = self.N - diagline = np.zeros(n_time, dtype=NODE) - - if not self.sparse_rqa: - # Get recurrence matrix - recmat = self.recurrence_matrix() - - if self.missing_values: - mv_indices = self.missing_value_indices - _diagline_dist_missingvalues( - n_time, diagline, recmat, mv_indices) - else: - _diagline_dist(n_time, diagline, recmat) - - # Calculations for sequential RQA - elif self.metric == "supremum" and self.threshold is not None: - # Get embedding - embedding = self.embedding - # Get time series dimension - dim = embedding.shape[1] - # Get threshold - eps = float(self.threshold) - - if self.missing_values: - mv_indices = self.missing_value_indices - _diagline_dist_sequential_missingvalues( - n_time, diagline, mv_indices, embedding, eps, dim) - else: - _diagline_dist_sequential( - n_time, diagline, embedding, eps, dim) + # Prepare + n_time = self.N + diagline = np.zeros(n_time, dtype=NODE) + if not self.sparse_rqa: + # Get recurrence matrix + recmat = self.recurrence_matrix() + + if self.missing_values: + mv_indices = self.missing_value_indices + _diagline_dist_missingvalues( + n_time, diagline, recmat, mv_indices) + else: + _diagline_dist(n_time, diagline, recmat) + + # Calculations for sequential RQA + elif self.metric == "supremum" and self.threshold is not None: + # Get embedding + embedding = self.embedding + # Get time series dimension + dim = embedding.shape[1] + # Get threshold + eps = float(self.threshold) + + if self.missing_values: + mv_indices = self.missing_value_indices + _diagline_dist_sequential_missingvalues( + n_time, diagline, mv_indices, embedding, eps, dim) else: - raise NotImplementedError( - "Sequential RQA is currently only available for " - "fixed threshold and the supremum metric.") + _diagline_dist_sequential( + n_time, diagline, embedding, eps, dim) - # Function just runs over the upper triangular matrix - self._diagline_dist = 2*diagline - self._diagline_dist_cached = True + else: + raise NotImplementedError( + "Sequential RQA is currently only available for " + "fixed threshold and the supremum metric.") - return self._diagline_dist + # Function just runs over the upper triangular matrix + return 2 * diagline @staticmethod def rejection_sampling(dist, M): @@ -1147,6 +1065,8 @@ def diag_entropy(self, l_min=2, resampled_dist=None): # RQA measures based on black vertical lines # + @Cached.method(attrs=( + "metric", "threshold", "missing_values", "sparse_rqa")) def vertline_dist(self): """ Return the :index:`frequency distribution of vertical line lengths @@ -1162,52 +1082,46 @@ def vertline_dist(self): :return: the frequency distribution of vertical line lengths :math:`P(v-1)`. """ - if self._vertline_dist_cached: - return self._vertline_dist - else: - # Prepare - n_time = self.N - vertline = np.zeros(n_time, dtype=NODE) - - if not self.sparse_rqa: - # Get recurrence matrix - recmat = self.recurrence_matrix() - - if self.missing_values: - mv_indices = self.missing_value_indices - _vertline_dist_missingvalues( - n_time, vertline, recmat, mv_indices) - else: - _vertline_dist(n_time, vertline, recmat) - - # Calculations for sequential RQA - elif self.metric == "supremum" and self.threshold is not None: - # Get embedding - embedding = self.embedding - # Get time series dimension - dim = embedding.shape[1] - # Get threshold - eps = float(self.threshold) - - if self.missing_values: - mv_indices = self.missing_value_indices - _vertline_dist_sequential_missingvalues( - n_time, vertline, mv_indices, embedding, eps, dim) - - else: - _vertline_dist_sequential( - n_time, vertline, embedding, eps, dim) + # Prepare + n_time = self.N + vertline = np.zeros(n_time, dtype=NODE) + if not self.sparse_rqa: + # Get recurrence matrix + recmat = self.recurrence_matrix() + + if self.missing_values: + mv_indices = self.missing_value_indices + _vertline_dist_missingvalues( + n_time, vertline, recmat, mv_indices) else: - raise NotImplementedError( - "Sequential RQA is currently only available for " - "fixed threshold and the supremum metric.") + _vertline_dist(n_time, vertline, recmat) + + # Calculations for sequential RQA + elif self.metric == "supremum" and self.threshold is not None: + # Get embedding + embedding = self.embedding + # Get time series dimension + dim = embedding.shape[1] + # Get threshold + eps = float(self.threshold) + + if self.missing_values: + mv_indices = self.missing_value_indices + _vertline_dist_sequential_missingvalues( + n_time, vertline, mv_indices, embedding, eps, dim) - # Function covers the whole recurrence matrix - self._vertline_dist = vertline - self._vertline_dist_cached = True + else: + _vertline_dist_sequential( + n_time, vertline, embedding, eps, dim) - return self._vertline_dist + else: + raise NotImplementedError( + "Sequential RQA is currently only available for " + "fixed threshold and the supremum metric.") + + # Function covers the whole recurrence matrix + return vertline def resample_vertline_dist(self, M): """ diff --git a/tests/test_core/test_cache.py b/tests/test_core/test_cache.py new file mode 100644 index 0000000..1594b7d --- /dev/null +++ b/tests/test_core/test_cache.py @@ -0,0 +1,321 @@ +# This file is part of pyunicorn. +# Copyright (C) 2008--2024 Jonathan F. Donges and pyunicorn authors +# URL: +# License: BSD (3-clause) +# +# Please acknowledge and cite the use of this software and its authors +# when results are used in publications or published elsewhere. +# +# You can use the following reference: +# J.F. Donges, J. Heitzig, B. Beronov, M. Wiedermann, J. Runge, Q.-Y. Feng, +# L. Tupikina, V. Stolbova, R.V. Donner, N. Marwan, H.A. Dijkstra, +# and J. Kurths, "Unified functional network and nonlinear time series analysis +# for complex systems science: The pyunicorn package" + +""" +Consistency checks for the method caching mix-in. +""" + +import pytest +import numpy as np + +from pyunicorn.core.cache import Cached + + +# pylint: disable=disallowed-name +class TestCached: + + class Foo(Cached): + silence_level = 1 + + def __cache_state__(self): + return () + + @Cached.method() + def foo1(self, a: int): + """Foo1""" + return a + + @Cached.method(name="foo2") + def foo2(self, *args): + """Foo2""" + return sum(args, start=0) + + @Cached.method() + def bar(self): + """Bar""" + return True + + class Bar(Cached): + def __init__(self): + self.counter = 0 + + def __cache_state__(self): + return (self.counter,) + + @classmethod + def test_args(cls, capfd: pytest.CaptureFixture): + """ + Dependence on method arguments. + """ + # immutable instances + X = cls.Foo() + + # wrapped method metadata + methods = ["foo1", "foo2", "bar"] + assert all(getattr(X, f).__doc__ == f.capitalize() for f in methods) + + for _ in range(3): + # method calls + X.cache_clear() + m = Cached.lru_params["maxsize"] + + r1, o1 = [], [] + for _ in range(m + 1): + r1.append(X.foo1(1)) + o1.append(cls.Foo.foo1.__wrapped__(X, 1)) + r1.append(X.foo1(1.0)) + assert all(r == r1[0] for r in r1) + assert all(r == o for r, o in zip(r1, o1)) + + r2, o2 = [], [] + for i in range(2 * m): + r2.append(X.foo2(*range(i))) + o2.append(cls.Foo.foo2.__wrapped__(X, *range(i))) + assert (np.diff(r2) == range(2 * m - 1)).all() + assert all(r == o for r, o in zip(r2, o2)) + + r3, o3 = [], [] + for i in range(5): + r3.append(X.bar()) + o3.append(cls.Foo.bar.__wrapped__(X)) + assert all(r and isinstance(r, bool) for r in r3) + assert all(r == o for r, o in zip(r3, o3)) + + # cache lookups + c1, c2, c3 = (getattr(X, f).cache_info() for f in methods) + assert c1.maxsize == c2.maxsize == m + assert (c1.currsize, c1.misses, c1.hits) == (2, 2, m) + assert (c2.currsize, c2.misses, c2.hits) == (m, 2 * m, 0) + assert (c3.currsize, c3.misses, c3.hits) == (1, 1, 4) + + # cache clearing + X.cache_clear(prefix="foo") + c1, c2, c3 = (getattr(X, f).cache_info() for f in methods) + assert (c1.currsize, c1.misses, c1.hits) == (0, 0, 0) + assert (c2.currsize, c2.misses, c2.hits) == (0, 0, 0) + assert (c3.currsize, c3.misses, c3.hits) == (1, 1, 4) + + # logging behaviour + capture = capfd.readouterr() + assert capture.err == "" + assert capture.out.split("\n")[:-1] == [ + "Calculating foo2..." for _ in range(2 * m)] + + @classmethod + def test_instance_immutable(cls): + """ + Dependence on immutable instance attributes. + """ + # immutable instances + X, Y = cls.Foo(), cls.Foo() + m = Cached.lru_params["maxsize"] + + # method calls + for i in range(m): + X.foo1(i) + Y.foo1(i) + Y.foo1(m-1) + + # cache lookups + x, y = (o.foo1.cache_info() for o in [X, Y]) + assert x == y + assert (x.currsize, x.misses, x.hits) == (m, 2 * m, 1) + + # cache clearing + X.cache_clear() + x, y = (o.foo1.cache_info() for o in [X, Y]) + assert x == y + assert (x.currsize, x.misses, x.hits) == (0, 0, 0) + + @classmethod + def test_instance_mutable(cls): + """ + Dependence on mutable instance attributes. + """ + # mutable instances + class Baz(cls.Bar): + @Cached.method() + def baz(self): + self.counter += 1 + return self.counter + + X, Y = Baz(), Baz() + + # method calls + m = Cached.lru_params["maxsize"] + k, n = 3, m // 2 + assert k < n < m + rs = [] + for i in range(n): + assert X.baz() == Y.baz() + Y.counter = 0 + for _ in range(k): + Y.baz() + + # cache lookups + x, y = (o.baz.cache_info() for o in [X, Y]) + assert x == y + assert (x.currsize, x.misses, x.hits) == (2 * n, 2 * n, k) + + # cache clearing + X.cache_clear() + x, y = (o.baz.cache_info() for o in [X, Y]) + assert x == y + assert (x.currsize, x.misses, x.hits) == (0, 0, 0) + + @classmethod + def test_instance_rec(cls): + """ + Dependence on owned `Cached` instances. + """ + # mutable instances + class BarFoo(cls.Bar): + def __init__(self, foo: cls.Foo): + self.foo = foo + cls.Bar.__init__(self) + + def __rec_cache_state__(self): + return (self.foo,) + + @Cached.method(attrs=("foo",)) + def baz(self, a: int): + f = self.foo.foo1(a) + self.counter += 1 + return f + + # method calls + m = Cached.lru_params["maxsize"] + X = BarFoo(cls.Foo()) + for i in range(m): + assert X.baz(i) == i + assert X.counter == m + for i in range(m): + assert X.baz(i) == i + assert X.counter == 2 * m + + # cache lookups + x, y = (m.cache_info() for m in [X.baz, X.foo.foo1]) + assert (x.currsize, x.misses, x.hits) == (m, 2 * m, 0) + assert (y.currsize, y.misses, y.hits) == (m, m, m) + + # cache clearing + X.cache_clear() + x, y = (m.cache_info() for m in [X.baz, X.foo.foo1]) + assert (x.currsize, x.misses, x.hits) == (0, 0, 0) + assert (y.currsize, y.misses, y.hits) == (0, 0, 0) + + @classmethod + def test_attributes(cls): + """ + Dependence on method-specific attributes. + """ + # mutable instances + class FooBaz(cls.Foo): + def __init__(self): + self.secret = 0 + + @Cached.method(attrs=("secret",)) + def baz(self): + """FooBaz""" + self.secret += 1 + return self.secret + + class BarBaz(cls.Bar): + def __init__(self): + cls.Bar.__init__(self) + self.secret = 0 + + @Cached.method(attrs=("secret",)) + def baz(self): + """BarBaz""" + self.counter += 1 + self.secret += 1 + return self.counter + self.secret + + X, Y = FooBaz(), BarBaz() + + # wrapped method metadata + assert all(o.baz.__doc__ == type(o).__name__ for o in [X, Y]) + + # method calls + m = Cached.lru_params["maxsize"] + k, n = 3, m // 2 + assert k < n < m + for _ in range(n): + X.baz() + Y.baz() + print() + X.secret = 0 + Y.secret = 0 + for _ in range(n): + X.baz() + Y.baz() + print() + X.secret = 0 + Y.secret = 0 + Y.counter = 0 + for _ in range(k): + X.baz() + Y.baz() + + # cache lookups + x1, y1 = (o.baz.cache_info() for o in [X, Y]) + assert (x1.currsize, x1.misses, x1.hits) == (n, n, n + k) + assert (y1.currsize, y1.misses, y1.hits) == (2 * n, 2 * n, k) + + # cache clearing + X.cache_clear() + x2, y2 = (o.baz.cache_info() for o in [X, Y]) + assert (x2.currsize, x2.misses, x2.hits) == (0, 0, 0) + assert y2 == y1 + Y.cache_clear() + x3, y3 = (o.baz.cache_info() for o in [X, Y]) + assert x3 == x2 + assert (y3.currsize, y3.misses, y3.hits) == (0, 0, 0) + + @classmethod + def test_disable(cls): + """ + Dependence on global switch. + """ + Cached.cache_enable = False + + class Baz(cls.Bar): + def __init__(self): + cls.Bar.__init__(self) + self.undeclared_counter = 0 + + @Cached.method() + def baz(self): + """Baz""" + self.undeclared_counter += 1 + + Cached.cache_enable = True + X = Baz() + + # wrapped method metadata + assert X.baz.__doc__ == "Baz" + + # method calls + m = Cached.lru_params["maxsize"] + for _ in range(2 * m): + X.baz() + + # no caching + assert X.counter == 0 + assert X.undeclared_counter == 2 * m + assert not hasattr(X.baz, "cache_info") + + # no-op + X.cache_clear() diff --git a/tests/test_timeseries/test_joint_recurrence_plot.py b/tests/test_timeseries/test_joint_recurrence_plot.py index 5cc4c48..8a0d8b7 100644 --- a/tests/test_timeseries/test_joint_recurrence_plot.py +++ b/tests/test_timeseries/test_joint_recurrence_plot.py @@ -29,9 +29,10 @@ def test_recurrence(metric: str, n: int): ts = CouplingAnalysis.test_data()[:n, 0] jrp = JointRecurrencePlot(ts, ts, threshold=(.1, .1), metric=(metric, metric)) - dist = { - i: jrp.distance_matrix(getattr(jrp, f"{i}_embedded"), metric=metric) - for i in "xy"} + dist = {} + for i in "xy": + jrp.embedding = getattr(jrp, f"{i}_embedded") + dist[i] = jrp.distance_matrix(metric=metric) assert all(d.shape == (n, n) for d in dist.values()) - assert np.allclose(*dist.values()) + assert np.allclose(dist["x"], dist["y"]) assert jrp.recurrence_matrix().shape == (n, n) diff --git a/tests/test_timeseries/test_timeseries.py b/tests/test_timeseries/test_timeseries.py index 542a8c3..a64823d 100644 --- a/tests/test_timeseries/test_timeseries.py +++ b/tests/test_timeseries/test_timeseries.py @@ -56,8 +56,8 @@ def testCrossRecurrencePlot(thresh, rr, metric: str): crp2 = CrossRecurrencePlot( x2, y2, threshold=thresh, recurrence_rate=rr, metric=metric) # get respective distance matrices - dist_1 = crp1.distance_matrix(crp1.x_embedded, crp1.y_embedded, metric) - dist_2 = crp2.distance_matrix(crp2.x_embedded, crp2.y_embedded, metric) + dist_1 = crp1.distance_matrix(metric) + dist_2 = crp2.distance_matrix(metric) # get respective recurrence matrices CR1 = crp1.recurrence_matrix() CR2 = crp2.recurrence_matrix()