Skip to content

Commit c5ef5a5

Browse files
authored
Merge pull request #58 from lincc-frameworks/a_few_benchmarks
add a few benchmarks
2 parents ab5fc4c + e71a2c7 commit c5ef5a5

File tree

1 file changed

+89
-1
lines changed

1 file changed

+89
-1
lines changed

benchmarks/benchmarks.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77
import pandas as pd
88
import pyarrow as pa
9-
from nested_pandas import NestedDtype
9+
from nested_pandas import NestedDtype, NestedFrame, datasets
1010

1111

1212
class AssignSingleDfToNestedSeries:
@@ -98,3 +98,91 @@ def time_run(self):
9898
def peakmem_run(self):
9999
"""Benchmark the memory usage of changing a single nested series element."""
100100
self.run()
101+
102+
103+
class NestedFrameAddNested:
104+
"""Benchmark the NestedFrame.add_nested function"""
105+
106+
n_base = 100
107+
layer_size = 1000
108+
base_nf = NestedFrame
109+
layer_nf = NestedFrame
110+
111+
def setup(self):
112+
"""Set up the benchmark environment"""
113+
# use provided seed, "None" acts as if no seed is provided
114+
randomstate = np.random.RandomState(seed=1)
115+
116+
# Generate base data
117+
base_data = {"a": randomstate.random(self.n_base), "b": randomstate.random(self.n_base) * 2}
118+
self.base_nf = NestedFrame(data=base_data)
119+
120+
layer_data = {
121+
"t": randomstate.random(self.layer_size * self.n_base) * 20,
122+
"flux": randomstate.random(self.layer_size * self.n_base) * 100,
123+
"band": randomstate.choice(["r", "g"], size=self.layer_size * self.n_base),
124+
"index": np.arange(self.layer_size * self.n_base) % self.n_base,
125+
}
126+
self.layer_nf = NestedFrame(data=layer_data).set_index("index")
127+
128+
def run(self):
129+
"""Run the benchmark."""
130+
self.base_nf.add_nested(self.layer_nf, "nested")
131+
132+
def time_run(self):
133+
"""Benchmark the runtime of adding a nested layer"""
134+
self.run()
135+
136+
def peakmem_run(self):
137+
"""Benchmark the memory usage of adding a nested layer"""
138+
self.run()
139+
140+
141+
class NestedFrameReduce:
142+
"""Benchmark the NestedFrame.reduce function"""
143+
144+
n_base = 100
145+
n_nested = 1000
146+
nf = NestedFrame
147+
148+
def setup(self):
149+
"""Set up the benchmark environment"""
150+
self.nf = datasets.generate_data(self.n_base, self.n_nested)
151+
152+
def run(self):
153+
"""Run the benchmark."""
154+
self.nf.reduce(np.mean, "nested.flux")
155+
156+
def time_run(self):
157+
"""Benchmark the runtime of applying the reduce function"""
158+
self.run()
159+
160+
def peakmem_run(self):
161+
"""Benchmark the memory usage of applying the reduce function"""
162+
self.run()
163+
164+
165+
class NestedFrameQuery:
166+
"""Benchmark the NestedFrame.query function"""
167+
168+
n_base = 100
169+
n_nested = 1000
170+
nf = NestedFrame
171+
172+
def setup(self):
173+
"""Set up the benchmark environment"""
174+
self.nf = datasets.generate_data(self.n_base, self.n_nested)
175+
176+
def run(self):
177+
"""Run the benchmark."""
178+
179+
# Apply nested layer query
180+
self.nf = self.nf.query("nested.band == 'g'")
181+
182+
def time_run(self):
183+
"""Benchmark the runtime of applying the two queries"""
184+
self.run()
185+
186+
def peakmem_run(self):
187+
"""Benchmark the memory usage of applying the two queries"""
188+
self.run()

0 commit comments

Comments
 (0)