Skip to content

Commit

Permalink
better error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
Filimoa committed Dec 11, 2024
1 parent 13280b0 commit b92bc92
Show file tree
Hide file tree
Showing 10 changed files with 444 additions and 133 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-h3"
version = "0.4.3"
version = "0.4.4"
edition = "2021"

[lib]
Expand All @@ -16,6 +16,7 @@ h3o = "0.4.0"
rayon = "1.10.0"

[profile.release]
panic = "unwind"
lto = true


64 changes: 52 additions & 12 deletions benchmarks/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
- Attempted to also benchmark H3-Pandas, but project appears to be abandoned and doesn't work with h3 >= 4.0.0.
"""

import json
import random
import statistics
import time
from dataclasses import asdict, dataclass, field
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Literal

import duckdb
Expand All @@ -33,8 +34,10 @@
class BenchmarkResult:
library: Library
name: str
seconds: float
avg_seconds: float
num_rows: int
num_iterations: int
std_seconds: float = 0.0

@property
def num_rows_human(self) -> str:
Expand All @@ -46,9 +49,10 @@ def num_rows_human(self) -> str:
return f"{self.num_rows / 1_000_000:,.0f}M"

def __repr__(self) -> str:
return (
f"{self.library}::{self.name}::{self.num_rows_human} = {self.seconds:.2f}s"
)
if self.num_iterations == 0:
return f"{self.library}::{self.name}::{self.num_rows_human} = {self.avg_seconds:.2f}s"
else:
return f"{self.library}::{self.name}::{self.num_rows_human} = {self.avg_seconds:.2f}s ± {self.std_seconds:.2f}s"


@dataclass
Expand Down Expand Up @@ -276,10 +280,11 @@ def run_all(
for library in libraries:
func = config["funcs"][library]

start = time.perf_counter()
perf_times = []
for _ in range(self.config.num_iterations):
start = time.perf_counter()
result_df = func(df.head(num_rows))
end = time.perf_counter()
perf_times.append(time.perf_counter() - start)

if self.config.verbose:
print(f"Library: {library}")
Expand All @@ -290,8 +295,12 @@ def run_all(
BenchmarkResult(
name=func_name,
library=library, # type: ignore
seconds=(end - start),
avg_seconds=statistics.mean(perf_times),
std_seconds=statistics.stdev(perf_times)
if len(perf_times) > 1
else 0,
num_rows=num_rows,
num_iterations=self.config.num_iterations,
)
)
print("done...")
Expand Down Expand Up @@ -644,30 +653,61 @@ def _get_grid_paths_py_h3(self, df: pl.DataFrame) -> pl.DataFrame:
)


def _pretty_print_avg_results(results: list[BenchmarkResult]):
by_name = defaultdict(list)

for d in results:
by_name[d.name].append(d)

multiples = []
for speeds in by_name.values():
fastest = min(v.avg_seconds for v in speeds)
for v in speeds:
multiples.append((v.library, v.avg_seconds / fastest))

by_lib = defaultdict(list)
for lib, mult in multiples:
by_lib[lib].append(mult)

median_by_lib = {lib: round(statistics.median(ms), 2) for lib, ms in by_lib.items()}
avg_by_lib = {lib: round(sum(ms) / len(ms), 2) for lib, ms in by_lib.items()}

print("\n\n======= Benchmark Final Results =======\n")
print(f"{'Library':<10} {'Median':<8} {'Average':<8}")
print("-" * 26)
for lib in median_by_lib:
print(f"{lib:<10} {median_by_lib[lib]:<8} {avg_by_lib[lib]:<8}")


if __name__ == "__main__":
fast_factor = 1
param_config = ParamConfig(
resolution=9,
grid_ring_distance=3,
num_iterations=1,
num_iterations=3,
libraries="all",
difficulty_to_num_rows={
"basic": 10_000_000 // fast_factor,
"medium": 10_000_000 // fast_factor,
"complex": 100_000 // fast_factor,
},
functions=["latlng_to_cell"],
# functions=["latlng_to_cell"],
# verbose=True,
)
benchmark = Benchmark(config=param_config)
results = benchmark.run_all()
prev_func = None
for result in results:
if prev_func != result.name:
print(f"\n{result.name}")
print(f"\n{result.name} (num_iterations={param_config.num_iterations})")
prev_func = result.name
print(result)

_pretty_print_avg_results(results)

import json
from dataclasses import asdict

if param_config.functions == "all":
with open("benchmarks/benchmarks-results.json", "w") as f:
json.dump([asdict(r) for r in results], f, indent=2)
2 changes: 2 additions & 0 deletions polars_h3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
cell_area,
edge_length,
get_num_cells,
get_pentagons,
great_circle_distance,
)
from .core.traversal import (
Expand Down Expand Up @@ -97,6 +98,7 @@
"directed_edge_to_boundary",
"great_circle_distance",
"average_hexagon_area",
"get_pentagons",
"cell_area",
"edge_length",
"average_hexagon_edge_length",
Expand Down
Loading

0 comments on commit b92bc92

Please sign in to comment.