Skip to content

Commit

Permalink
Add quantile benchmark (#418)
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian authored Jan 24, 2025
1 parent e2aa2be commit ac319cc
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 10 deletions.
4 changes: 3 additions & 1 deletion asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
// Customizable commands for building, installing, and
// uninstalling the project. See asv.conf.json documentation.
//
// "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
// "build_command": [
// "python setup.py build",
Expand All @@ -32,6 +31,9 @@
"python setup.py build",
"python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}"
],
"install_command": [
"in-dir={env_dir} python -mpip install {wheel_file} --no-deps"
],

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
Expand Down
11 changes: 2 additions & 9 deletions asv_bench/benchmarks/cohorts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import flox

from .helpers import codes_for_resampling


class Cohorts:
"""Time the core reduction function."""
Expand Down Expand Up @@ -200,15 +202,6 @@ def setup(self, *args, **kwargs):
self.expected = pd.RangeIndex(self.by.max() + 1)


def codes_for_resampling(group_as_index, freq):
s = pd.Series(np.arange(group_as_index.size), group_as_index)
grouped = s.groupby(pd.Grouper(freq=freq))
first_items = grouped.first()
counts = grouped.count()
codes = np.repeat(np.arange(len(first_items)), counts)
return codes


class PerfectBlockwiseResampling(Cohorts):
"""Perfectly chunked for blockwise resampling."""

Expand Down
11 changes: 11 additions & 0 deletions asv_bench/benchmarks/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import numpy as np
import pandas as pd


def codes_for_resampling(group_as_index: pd.Index, freq: str) -> np.ndarray:
s = pd.Series(np.arange(group_as_index.size), group_as_index)
grouped = s.groupby(pd.Grouper(freq=freq))
first_items = grouped.first()
counts = grouped.count()
codes = np.repeat(np.arange(len(first_items)), counts)
return codes
21 changes: 21 additions & 0 deletions asv_bench/benchmarks/reduce.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import numpy as np
import pandas as pd
import xarray as xr
from asv_runner.benchmarks.mark import parameterize, skip_for_params

import flox
import flox.aggregations
import flox.xarray

from .helpers import codes_for_resampling

N = 3000
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"]
Expand Down Expand Up @@ -138,3 +142,20 @@ def setup(self, *args, **kwargs):
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
# self.axis = None
# setup_jit()


class Quantile:
def setup(self, *args, **kwargs):
shape = (31411, 25, 25, 1)

time = pd.date_range("2014-01-01", "2099-12-31", freq="D")
self.da = xr.DataArray(
np.random.randn(*shape),
name="pr",
dims=("time", "lat", "lon", "lab"),
coords={"time": time},
)
self.codes = xr.DataArray(dims="time", data=codes_for_resampling(time, "YE"), name="time")

def time_quantile(self):
flox.xarray.xarray_reduce(self.da, self.codes, engine="flox", func="quantile", q=0.9)
4 changes: 4 additions & 0 deletions readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
version: 2

sphinx:
# Path to your Sphinx configuration file.
configuration: docs/source/conf.py

build:
os: "ubuntu-lts-latest"
tools:
Expand Down

0 comments on commit ac319cc

Please sign in to comment.