Skip to content

Commit

Permalink
Merge pull request #58 from lincc-frameworks/a_few_benchmarks
Browse files Browse the repository at this point in the history
add a few benchmarks
  • Loading branch information
dougbrn authored May 7, 2024
2 parents ab5fc4c + e71a2c7 commit c5ef5a5
Showing 1 changed file with 89 additions and 1 deletion.
90 changes: 89 additions & 1 deletion benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pandas as pd
import pyarrow as pa
from nested_pandas import NestedDtype
from nested_pandas import NestedDtype, NestedFrame, datasets


class AssignSingleDfToNestedSeries:
Expand Down Expand Up @@ -98,3 +98,91 @@ def time_run(self):
def peakmem_run(self):
"""Benchmark the memory usage of changing a single nested series element."""
self.run()


class NestedFrameAddNested:
"""Benchmark the NestedFrame.add_nested function"""

n_base = 100
layer_size = 1000
base_nf = NestedFrame
layer_nf = NestedFrame

def setup(self):
"""Set up the benchmark environment"""
# use provided seed, "None" acts as if no seed is provided
randomstate = np.random.RandomState(seed=1)

# Generate base data
base_data = {"a": randomstate.random(self.n_base), "b": randomstate.random(self.n_base) * 2}
self.base_nf = NestedFrame(data=base_data)

layer_data = {
"t": randomstate.random(self.layer_size * self.n_base) * 20,
"flux": randomstate.random(self.layer_size * self.n_base) * 100,
"band": randomstate.choice(["r", "g"], size=self.layer_size * self.n_base),
"index": np.arange(self.layer_size * self.n_base) % self.n_base,
}
self.layer_nf = NestedFrame(data=layer_data).set_index("index")

def run(self):
"""Run the benchmark."""
self.base_nf.add_nested(self.layer_nf, "nested")

def time_run(self):
"""Benchmark the runtime of adding a nested layer"""
self.run()

def peakmem_run(self):
"""Benchmark the memory usage of adding a nested layer"""
self.run()


class NestedFrameReduce:
"""Benchmark the NestedFrame.reduce function"""

n_base = 100
n_nested = 1000
nf = NestedFrame

def setup(self):
"""Set up the benchmark environment"""
self.nf = datasets.generate_data(self.n_base, self.n_nested)

def run(self):
"""Run the benchmark."""
self.nf.reduce(np.mean, "nested.flux")

def time_run(self):
"""Benchmark the runtime of applying the reduce function"""
self.run()

def peakmem_run(self):
"""Benchmark the memory usage of applying the reduce function"""
self.run()


class NestedFrameQuery:
"""Benchmark the NestedFrame.query function"""

n_base = 100
n_nested = 1000
nf = NestedFrame

def setup(self):
"""Set up the benchmark environment"""
self.nf = datasets.generate_data(self.n_base, self.n_nested)

def run(self):
"""Run the benchmark."""

# Apply nested layer query
self.nf = self.nf.query("nested.band == 'g'")

def time_run(self):
"""Benchmark the runtime of applying the two queries"""
self.run()

def peakmem_run(self):
"""Benchmark the memory usage of applying the two queries"""
self.run()

0 comments on commit c5ef5a5

Please sign in to comment.