From cca0edf0ae545c6bf806f401a4863673e08c9901 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 28 Mar 2024 16:51:09 +0100 Subject: [PATCH 1/4] EXPERIMENT: [C++] Always prefer mimalloc to jemalloc --- cpp/src/arrow/memory_pool.cc | 18 ++++++++---------- dev/archery/archery/benchmark/runner.py | 2 ++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 2f8ce3a6fa8c7..1e855311a98ed 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -85,19 +85,17 @@ struct SupportedBackend { const std::vector& SupportedBackends() { static std::vector backends = { - // ARROW-12316: Apple => mimalloc first, then jemalloc - // non-Apple => jemalloc first, then mimalloc -#if defined(ARROW_JEMALLOC) && !defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, -#endif + // mimalloc is our preferred allocator for several reasons: + // 1) it has good performance + // 2) it is well-supported on all our main platforms (Linux, macOS, Windows) + // 3) it is easy to configure and has a consistent API. #ifdef ARROW_MIMALLOC - {"mimalloc", MemoryPoolBackend::Mimalloc}, + {"mimalloc", MemoryPoolBackend::Mimalloc}, #endif -#if defined(ARROW_JEMALLOC) && defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, +#ifdef ARROW_JEMALLOC + {"jemalloc", MemoryPoolBackend::Jemalloc}, #endif - {"system", MemoryPoolBackend::System} - }; + {"system", MemoryPoolBackend::System}}; return backends; } diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index a91989fb95257..9ebb9226e3743 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -123,6 +123,8 @@ def default_configuration(**kwargs): with_csv=True, with_dataset=True, with_json=True, + with_jemalloc=True, + with_mimalloc=True, with_parquet=True, with_python=False, with_brotli=True, From 271fb718dc5b8fea5e544e424a38b86049fdea07 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 15 Jul 2024 16:54:25 +0200 Subject: [PATCH 2/4] Relevant doc changes --- docs/source/cpp/memory.rst | 6 +++--- docs/source/python/memory.rst | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst index 33907b5580f61..032b7d1ac90f1 100644 --- a/docs/source/cpp/memory.rst +++ b/docs/source/cpp/memory.rst @@ -139,9 +139,9 @@ Default Memory Pool The default memory pool depends on how Arrow C++ was compiled: -- if enabled at compile time, a `jemalloc `_ heap; -- otherwise, if enabled at compile time, a - `mimalloc `_ heap; +- if enabled at compile time, a `mimalloc `_ + heap; +- otherwise, if enabled at compile time, a `jemalloc `_ heap; - otherwise, the C library ``malloc`` heap. Overriding the Default Memory Pool diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst index 7b49d48ab20fa..029d30cc1b693 100644 --- a/docs/source/python/memory.rst +++ b/docs/source/python/memory.rst @@ -110,12 +110,12 @@ the buffer is garbage-collected, all of the memory is freed: pa.total_allocated_bytes() Besides the default built-in memory pool, there may be additional memory pools -to choose (such as `mimalloc `_) -from depending on how Arrow was built. One can get the backend -name for a memory pool:: +to choose from (such as `jemalloc `_) +depending on how Arrow was built. One can get the backend name for a memory +pool:: >>> pa.default_memory_pool().backend_name - 'jemalloc' + 'mimalloc' .. seealso:: :ref:`API documentation for memory pools `. From 005a753444854ca3eb45995ad728c5a1e83e8501 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 16 Jul 2024 14:41:26 +0900 Subject: [PATCH 3/4] Add a debug log --- dev/tasks/linux-packages/github.linux.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml index 9e24835b8b627..c6fcea8865d39 100644 --- a/dev/tasks/linux-packages/github.linux.yml +++ b/dev/tasks/linux-packages/github.linux.yml @@ -65,6 +65,11 @@ jobs: set -e pushd arrow/dev/tasks/linux-packages rake version:update + for spec in */yum/*.spec.in; do + echo "::group::Log ${spec}" + cat ${spec} + echo "::endgroup::" + done rake docker:pull || : rake --trace {{ task_namespace }}:build BUILD_DIR=build popd From 9483626972635050656827cc4e9acecfc82521c7 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 16 Jul 2024 14:55:37 +0900 Subject: [PATCH 4/4] Specify ARROW_RELEASE_TIME explicitly --- dev/tasks/linux-packages/github.linux.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml index c6fcea8865d39..891682c4358d8 100644 --- a/dev/tasks/linux-packages/github.linux.yml +++ b/dev/tasks/linux-packages/github.linux.yml @@ -64,12 +64,7 @@ jobs: run: | set -e pushd arrow/dev/tasks/linux-packages - rake version:update - for spec in */yum/*.spec.in; do - echo "::group::Log ${spec}" - cat ${spec} - echo "::endgroup::" - done + rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)" rake docker:pull || : rake --trace {{ task_namespace }}:build BUILD_DIR=build popd