diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..b0cb126 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,59 @@ +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.8 +target-version = "py38" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F", "N", "D", "S", "PL", "NPY"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/README.rst b/README.rst index 7d49209..26bed95 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,9 @@ NOnLinear measures for Dynamical Systems (nolds) ================================================ -.. image:: https://travis-ci.com/CSchoel/nolds.svg?branch=master - :target: https://travis-ci.com/CSchoel/nolds +.. image:: https://github.com/CSchoel/nolds/actions/workflows/ci.yaml/badge.svg + :target: https://github.com/CSchoel/nolds/actions/workflows/ci.yaml + .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3814723.svg :target: https://doi.org/10.5281/zenodo.3814723 @@ -13,9 +14,10 @@ NOnLinear measures for Dynamical Systems (nolds) .. image:: https://api.codacy.com/project/badge/Grade/42c30d253b384e87a86b70c8fa0da6b2 :target: https://www.codacy.com/app/christopher.schoelzel/nolds?utm_source=github.com&utm_medium=referral&utm_content=CSchoel/nolds&utm_campaign=Badge_Grade -.. image:: https://codecov.io/gh/CSchoel/nolds/branch/master/graph/badge.svg +.. image:: https://codecov.io/gh/CSchoel/nolds/branch/master/graph/badge.svg?token=Xgr82QKhFi :target: https://codecov.io/gh/CSchoel/nolds + Nolds is a small numpy-based library that provides an implementation and a learning resource for nonlinear measures for dynamical systems based on one-dimensional time series. Currently the following measures are implemented: diff --git a/nolds/measures.py b/nolds/measures.py index 1e32294..918cffd 100644 --- a/nolds/measures.py +++ b/nolds/measures.py @@ -1825,7 +1825,7 @@ def mfhurst_dm(data, qvals=None, max_dists=range(5, 20), detrend=True, return [mH, sH] -def corr_dim(data, emb_dim, rvals=None, dist=rowwise_euclidean, +def corr_dim(data, emb_dim, lag=1, rvals=None, dist=rowwise_euclidean, fit="RANSAC", debug_plot=False, debug_data=False, plot_file=None): """ Calculates the correlation dimension with the Grassberger-Procaccia algorithm @@ -1857,9 +1857,12 @@ def corr_dim(data, emb_dim, rvals=None, dist=rowwise_euclidean, This version of the algorithm is created for one-dimensional (scalar) time series. Therefore, before calculating C(r), a delay embedding of the time series is performed to yield emb_dim dimensional vectors - Y_i = [X_i, X_(i+1), X_(i+2), ... X_(i+embd_dim-1)]. Choosing a higher - value for emb_dim allows to reconstruct higher dimensional dynamics and - avoids "systematic errors due to corrections to scaling". + Y_i = [X_i, X_(i+1*lag), X_(i+2*lag), ... X_(i+(embd_dim-1)*lag)]. Choosing + a higher value for emb_dim allows to reconstruct higher dimensional dynamics + and avoids "systematic errors due to corrections to scaling". Choosing a + higher value for lag allows to avoid overestimating correlation because + X_i ~= X_i+1, but it should also not be set too high to not underestimate + correlation due to exponential divergence of trajectories in chaotic systems. References: .. [cd_1] P. Grassberger and I. Procaccia, “Characterization of strange @@ -1911,6 +1914,7 @@ def corr_dim(data, emb_dim, rvals=None, dist=rowwise_euclidean, ``csums`` are the corresponding log(C(r)) and ``poly`` are the line coefficients (``[slope, intercept]``) """ + # TODO determine lag in units of time instead of number of datapoints data = np.asarray(data) # TODO what are good values for r? @@ -1918,12 +1922,27 @@ def corr_dim(data, emb_dim, rvals=None, dist=rowwise_euclidean, if rvals is None: sd = np.std(data, ddof=1) rvals = logarithmic_r(0.1 * sd, 0.5 * sd, 1.03) - n = len(data) - orbit = delay_embedding(data, emb_dim, lag=1) - dists = np.array([dist(orbit, orbit[i]) for i in range(len(orbit))]) + orbit = delay_embedding(data, emb_dim, lag=lag) + n = len(orbit) + dists = np.zeros((len(orbit), len(orbit)), dtype=np.float64) + for i in range(len(orbit)): + # calculate distances between X_i and X_i+1, X_i+2, ... , X_n-1 + # NOTE: strictly speaking, [cd_1] does not specify to exclude self-matches + # however, since both [cd_2] and [cd_3] specify to only compare i with j != i + # or j > i respectively, it is safe to assume that this was an oversight in + # [cd_1] + d = dist(orbit[i+1:], orbit[i]) + dists[i+1:,i] = d # fill column i + dists[i,i+1:] = d # fill row i csums = [] for r in rvals: - s = 1.0 / (n * (n - 1)) * np.sum(dists < r) + # NOTE: The [cd_1] and [cd_2] both use the factor 1/N^2 here. + # However, since we only use these values to fit a line in a log-log plot + # any multiplicative constant doesn't change the result since it will + # only result in an offset on the y-axis. Also, [cd_3] has a point here + # in that if we exclude self-matches in the numerator, it makes sense to + # also exclude self-matches from the denominator. + s = 1.0 / (n * (n - 1)) * np.sum(dists <= r) csums.append(s) csums = np.array(csums) # filter zeros from csums diff --git a/nolds/test_measures.py b/nolds/test_measures.py index ee5f675..fcc06e8 100644 --- a/nolds/test_measures.py +++ b/nolds/test_measures.py @@ -8,8 +8,8 @@ import numpy as np # import internal module to test helping functions -from . import measures as nolds -from . import datasets +from nolds import measures as nolds +from nolds import datasets import unittest import warnings @@ -333,7 +333,22 @@ def test_corr_dim(self): cd = nolds.corr_dim(data, 4, fit="poly") self.assertAlmostEqual(cd, 0.5, delta=0.15) # TODO test example for cd > 1 - def test_corr_dim_logistic(self): + + def test_lorenz(self): + # NOTE: The settings of n, discard, lag, emb_dim, and rvals were determined + # experimentally to find the smallest dataset that yields the results reported + # by Grassberger and Procaccia (1983). + discard = 5000 + n = 5000 + lag = 5 + emb_dim = 5 + data = datasets.lorenz_euler(n + discard, 10, 28, 8/3, start=(1,1,1), dt=0.012) + x = data[discard:,1] + rvals = nolds.logarithmic_r(1, np.e, 1.1) # determined experimentally + cd = nolds.corr_dim(x, emb_dim, fit="poly", rvals=rvals, lag=5, debug_plot=True) + self.assertAlmostEqual(cd, 2.05, delta=0.1) + + def test_logistic(self): # TODO replicate tests with logistic map from grassberger-procaccia pass