From a3bf2494d52c5ba056ff387c013bf6006093cd74 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 21 Jan 2025 20:45:34 -0700 Subject: [PATCH 1/6] Add quantile benchmark --- asv_bench/benchmarks/reduce.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/asv_bench/benchmarks/reduce.py b/asv_bench/benchmarks/reduce.py index a31da26a..b2504479 100644 --- a/asv_bench/benchmarks/reduce.py +++ b/asv_bench/benchmarks/reduce.py @@ -1,9 +1,11 @@ import numpy as np import pandas as pd +import xarray as xr from asv_runner.benchmarks.mark import parameterize, skip_for_params import flox import flox.aggregations +import flox.xarray N = 3000 funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"] @@ -138,3 +140,20 @@ def setup(self, *args, **kwargs): # self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5)) # self.axis = None # setup_jit() + + +class Quantile: + def setup(self, *args, **kwargs): + shape = (31411, 25, 25, 1) + + time = pd.date_range("2014-01-01", "2099-12-31", freq="D") + self.da = xr.DataArray( + np.random.randn(*shape), + name="pr", + dims=("time", "lat", "lon", "lab"), + coords={"time": time}, + ) + self.rs = self.da.resample(time="YE") + + def time_quantile(self): + flox.xarray.xarray_reduce(self.da, self.rs.encoded.codes, engine="flox", func="quantile", q=0.9) From e92decbbf92600f5736864f9b7886909418ef3b8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 23 Jan 2025 20:39:36 -0700 Subject: [PATCH 2/6] refactor --- asv_bench/benchmarks/cohorts.py | 11 ++--------- asv_bench/benchmarks/helpers.py | 11 +++++++++++ asv_bench/benchmarks/reduce.py | 6 ++++-- 3 files changed, 17 insertions(+), 11 deletions(-) create mode 100644 asv_bench/benchmarks/helpers.py diff --git a/asv_bench/benchmarks/cohorts.py b/asv_bench/benchmarks/cohorts.py index fef47450..8fa841fd 100644 --- a/asv_bench/benchmarks/cohorts.py +++ b/asv_bench/benchmarks/cohorts.py @@ -6,6 +6,8 @@ import flox +from .helpers import codes_for_resampling + class Cohorts: """Time the core reduction function.""" @@ -200,15 +202,6 @@ def setup(self, *args, **kwargs): self.expected = pd.RangeIndex(self.by.max() + 1) -def codes_for_resampling(group_as_index, freq): - s = pd.Series(np.arange(group_as_index.size), group_as_index) - grouped = s.groupby(pd.Grouper(freq=freq)) - first_items = grouped.first() - counts = grouped.count() - codes = np.repeat(np.arange(len(first_items)), counts) - return codes - - class PerfectBlockwiseResampling(Cohorts): """Perfectly chunked for blockwise resampling.""" diff --git a/asv_bench/benchmarks/helpers.py b/asv_bench/benchmarks/helpers.py new file mode 100644 index 00000000..34ca30e9 --- /dev/null +++ b/asv_bench/benchmarks/helpers.py @@ -0,0 +1,11 @@ +import numpy as np +import pandas as pd + + +def codes_for_resampling(group_as_index: pd.Index, freq: str) -> np.ndarray: + s = pd.Series(np.arange(group_as_index.size), group_as_index) + grouped = s.groupby(pd.Grouper(freq=freq)) + first_items = grouped.first() + counts = grouped.count() + codes = np.repeat(np.arange(len(first_items)), counts) + return codes diff --git a/asv_bench/benchmarks/reduce.py b/asv_bench/benchmarks/reduce.py index b2504479..e786f47e 100644 --- a/asv_bench/benchmarks/reduce.py +++ b/asv_bench/benchmarks/reduce.py @@ -7,6 +7,8 @@ import flox.aggregations import flox.xarray +from .helpers import codes_for_resampling + N = 3000 funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"] engines = [ @@ -153,7 +155,7 @@ def setup(self, *args, **kwargs): dims=("time", "lat", "lon", "lab"), coords={"time": time}, ) - self.rs = self.da.resample(time="YE") + self.codes = xr.DataArray(dims="time", data=codes_for_resampling(time, "YE"), name="time") def time_quantile(self): - flox.xarray.xarray_reduce(self.da, self.rs.encoded.codes, engine="flox", func="quantile", q=0.9) + flox.xarray.xarray_reduce(self.da, self.codes, engine="flox", func="quantile", q=0.9) From 421d9a1972a3a096dffa134fb1d431ff8ed5adf7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 20 Jan 2025 10:11:13 -0700 Subject: [PATCH 3/6] fix rtd --- readthedocs.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/readthedocs.yml b/readthedocs.yml index 51b6b6b1..b42bd07c 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,5 +1,9 @@ version: 2 +sphinx: + # Path to your Sphinx configuration file. + configuration: docs/source/conf.py + build: os: "ubuntu-lts-latest" tools: From 64b1b8bcb28cfd6680cfb8edf31d9a2d69d4b1f8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 23 Jan 2025 20:48:46 -0700 Subject: [PATCH 4/6] [skip-ci] verbose mode --- .github/workflows/benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index e3930b86..d10684f1 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -54,7 +54,7 @@ jobs: echo "Baseline: $LAST_HEAD_SHA ($BASE_LABEL)" echo "Contender: ${GITHUB_SHA} ($HEAD_LABEL)" # Run benchmarks for current commit against base - ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" + ASV_OPTIONS="-v --split --show-stderr --factor $ASV_FACTOR" asv continuous $ASV_OPTIONS $BASE_SHA ${GITHUB_SHA} \ | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \ | tee benchmarks.log From 4e42c30f8875a3abbc5b90b9250b05abe719b435 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 23 Jan 2025 20:52:07 -0700 Subject: [PATCH 5/6] [skip-ci] custom install command --- asv_bench/asv.conf.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 9178209d..41a76c99 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -21,7 +21,6 @@ // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // - // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"], // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], // "build_command": [ // "python setup.py build", @@ -32,6 +31,9 @@ "python setup.py build", "python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}" ], + "install_command": [ + "in-dir={env_dir} python -mpip install {wheel_file} --no-deps" + ], // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). From a1e258ad36f33313b3737a442f39ea174821b8f8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 23 Jan 2025 21:00:04 -0700 Subject: [PATCH 6/6] [skip-ci] Revert "[skip-ci] verbose mode" This reverts commit 64b1b8bcb28cfd6680cfb8edf31d9a2d69d4b1f8. --- .github/workflows/benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index d10684f1..e3930b86 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -54,7 +54,7 @@ jobs: echo "Baseline: $LAST_HEAD_SHA ($BASE_LABEL)" echo "Contender: ${GITHUB_SHA} ($HEAD_LABEL)" # Run benchmarks for current commit against base - ASV_OPTIONS="-v --split --show-stderr --factor $ASV_FACTOR" + ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" asv continuous $ASV_OPTIONS $BASE_SHA ${GITHUB_SHA} \ | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \ | tee benchmarks.log