diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index bbbb575..46a2db8 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pyarrow as pa -from nested_pandas import NestedDtype +from nested_pandas import NestedDtype, NestedFrame, datasets class AssignSingleDfToNestedSeries: @@ -98,3 +98,91 @@ def time_run(self): def peakmem_run(self): """Benchmark the memory usage of changing a single nested series element.""" self.run() + + +class NestedFrameAddNested: + """Benchmark the NestedFrame.add_nested function""" + + n_base = 100 + layer_size = 1000 + base_nf = NestedFrame + layer_nf = NestedFrame + + def setup(self): + """Set up the benchmark environment""" + # use provided seed, "None" acts as if no seed is provided + randomstate = np.random.RandomState(seed=1) + + # Generate base data + base_data = {"a": randomstate.random(self.n_base), "b": randomstate.random(self.n_base) * 2} + self.base_nf = NestedFrame(data=base_data) + + layer_data = { + "t": randomstate.random(self.layer_size * self.n_base) * 20, + "flux": randomstate.random(self.layer_size * self.n_base) * 100, + "band": randomstate.choice(["r", "g"], size=self.layer_size * self.n_base), + "index": np.arange(self.layer_size * self.n_base) % self.n_base, + } + self.layer_nf = NestedFrame(data=layer_data).set_index("index") + + def run(self): + """Run the benchmark.""" + self.base_nf.add_nested(self.layer_nf, "nested") + + def time_run(self): + """Benchmark the runtime of adding a nested layer""" + self.run() + + def peakmem_run(self): + """Benchmark the memory usage of adding a nested layer""" + self.run() + + +class NestedFrameReduce: + """Benchmark the NestedFrame.reduce function""" + + n_base = 100 + n_nested = 1000 + nf = NestedFrame + + def setup(self): + """Set up the benchmark environment""" + self.nf = datasets.generate_data(self.n_base, self.n_nested) + + def run(self): + """Run the benchmark.""" + self.nf.reduce(np.mean, "nested.flux") + + def time_run(self): + """Benchmark the runtime of applying the reduce function""" + self.run() + + def peakmem_run(self): + """Benchmark the memory usage of applying the reduce function""" + self.run() + + +class NestedFrameQuery: + """Benchmark the NestedFrame.query function""" + + n_base = 100 + n_nested = 1000 + nf = NestedFrame + + def setup(self): + """Set up the benchmark environment""" + self.nf = datasets.generate_data(self.n_base, self.n_nested) + + def run(self): + """Run the benchmark.""" + + # Apply nested layer query + self.nf = self.nf.query("nested.band == 'g'") + + def time_run(self): + """Benchmark the runtime of applying the two queries""" + self.run() + + def peakmem_run(self): + """Benchmark the memory usage of applying the two queries""" + self.run()