Skip to content

Commit

Permalink
More fine tuning (including some heuristics)
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Jan 20, 2025
1 parent 4fabd7c commit 9887a49
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions src/blosc2/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1407,10 +1407,15 @@ def compute_chunks_blocks( # noqa: C901
# Minimum blocksize calculation
min_blocksize = blocksize
if platform.machine() == "x86_64":
# For modern Intel/AMD archs, experiments say to use half of the L2 size
# min_blocksize = blosc2.cpu_info["l2_cache_size"] // 2
# For modern Intel/AMD archs, experiments say to split the cache among the operands
min_blocksize = blosc2.cpu_info["l2_cache_size"] // 4
if blosc2.cpu_info["l2_cache_size"] >= 2**21:
# Incidentally, some modern Intel CPUs have a larger L2 cache (2 MB) and they
# prefer smaller blocks. This is somewhat heuristic, but it seems to work well.
min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4
# New experiments say that using the 4x of the L1 size is even better
min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4
# But let's avoid this because it does not work well for AMD archs
# min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4
elif platform.system() == "Darwin" and "arm" in platform.machine():
# For Apple Silicon, experiments say we can use 4x the L1 size
min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4
Expand Down

0 comments on commit 9887a49

Please sign in to comment.