From b4f7efe5bdc2218bb595b130b4f65237caecfa76 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 22 Aug 2024 14:45:00 +0200
Subject: [PATCH 01/63] GH-43787: [C++] Register the new Opaque extension type
 by default (#43788)

This is to resolve #43787

> The Opaque extension type implementation for C++ (plus python bindings) was added in https://github.com/apache/arrow/pull/43458, but it was not registered by default, which we should do for canonical extension types (see https://github.com/apache/arrow/pull/43458#issuecomment-2302551404)

Additionally, this adds `bool8` extension type builds with `ARROW_JSON=false` as discussed [here](https://github.com/apache/arrow/commit/525881987d0b9b4f464c3e3593a9a7b4e3c767d0#r145613657)

### Rationale for this change

Canonical types should be registered by default if possible (except e.g. if they can't be compiled due to `ARROW_JSON=false`).

### What changes are included in this PR?

This adds default registration for `opaque`, changes when `bool8` is built and moves all canonical tests under the same test target.

### Are these changes tested?

Changes are tested by previously existing tests.

### Are there any user-facing changes?

`opaue` will now be registered by default and `bool8` will be present in case `ARROW_JSON=false` at build time.
* GitHub Issue: #43787

Authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: Rok Mihevc <rok@mihevc.org>
---
 cpp/src/arrow/CMakeLists.txt                 |  2 +-
 cpp/src/arrow/extension/CMakeLists.txt       | 18 ++++++-----------
 cpp/src/arrow/extension/bool8.h              |  2 ++
 cpp/src/arrow/extension/bool8_test.cc        |  1 -
 cpp/src/arrow/extension/fixed_shape_tensor.h |  2 ++
 cpp/src/arrow/extension/opaque.h             |  2 ++
 cpp/src/arrow/extension/opaque_test.cc       |  2 --
 cpp/src/arrow/extension_type.cc              | 21 ++++++++++++--------
 python/pyarrow/tests/test_extension_type.py  |  5 ++---
 9 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index fb7253b6fd69d..89f28ee416ede 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -374,6 +374,7 @@ set(ARROW_SRCS
     datum.cc
     device.cc
     extension_type.cc
+    extension/bool8.cc
     pretty_print.cc
     record_batch.cc
     result.cc
@@ -906,7 +907,6 @@ endif()
 
 if(ARROW_JSON)
   arrow_add_object_library(ARROW_JSON
-                           extension/bool8.cc
                            extension/fixed_shape_tensor.cc
                            extension/opaque.cc
                            json/options.cc
diff --git a/cpp/src/arrow/extension/CMakeLists.txt b/cpp/src/arrow/extension/CMakeLists.txt
index fcd5fa529ab56..5cb4bc77af2a4 100644
--- a/cpp/src/arrow/extension/CMakeLists.txt
+++ b/cpp/src/arrow/extension/CMakeLists.txt
@@ -15,22 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
-add_arrow_test(test
-               SOURCES
-               bool8_test.cc
-               PREFIX
-               "arrow-extension-bool8")
+set(CANONICAL_EXTENSION_TESTS bool8_test.cc)
 
-add_arrow_test(test
-               SOURCES
-               fixed_shape_tensor_test.cc
-               PREFIX
-               "arrow-fixed-shape-tensor")
+if(ARROW_JSON)
+  list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc opaque_test.cc)
+endif()
 
 add_arrow_test(test
                SOURCES
-               opaque_test.cc
+               ${CANONICAL_EXTENSION_TESTS}
                PREFIX
-               "arrow-extension-opaque")
+               "arrow-canonical-extensions")
 
 arrow_install_all_headers("arrow/extension")
diff --git a/cpp/src/arrow/extension/bool8.h b/cpp/src/arrow/extension/bool8.h
index 02e629b28a867..fbb507639e272 100644
--- a/cpp/src/arrow/extension/bool8.h
+++ b/cpp/src/arrow/extension/bool8.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "arrow/extension_type.h"
 
 namespace arrow::extension {
diff --git a/cpp/src/arrow/extension/bool8_test.cc b/cpp/src/arrow/extension/bool8_test.cc
index eabcfcf62d32c..ee77332bc3257 100644
--- a/cpp/src/arrow/extension/bool8_test.cc
+++ b/cpp/src/arrow/extension/bool8_test.cc
@@ -19,7 +19,6 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
-#include "arrow/testing/extension_type.h"
 #include "arrow/testing/gtest_util.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h
index 20ec20a64c2d4..80a602021c60b 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "arrow/extension_type.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/extension/opaque.h b/cpp/src/arrow/extension/opaque.h
index 9814b391cbad6..5d3411798f88d 100644
--- a/cpp/src/arrow/extension/opaque.h
+++ b/cpp/src/arrow/extension/opaque.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "arrow/extension_type.h"
 #include "arrow/type.h"
 
diff --git a/cpp/src/arrow/extension/opaque_test.cc b/cpp/src/arrow/extension/opaque_test.cc
index 1629cdb39651c..16fcba3fa6bb0 100644
--- a/cpp/src/arrow/extension/opaque_test.cc
+++ b/cpp/src/arrow/extension/opaque_test.cc
@@ -25,7 +25,6 @@
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
-#include "arrow/testing/extension_type.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/checked_cast.h"
@@ -169,7 +168,6 @@ TEST(OpaqueType, MetadataRoundTrip) {
 TEST(OpaqueType, BatchRoundTrip) {
   auto type = internal::checked_pointer_cast<extension::OpaqueType>(
       extension::opaque(binary(), "geometry", "adbc.postgresql"));
-  ExtensionTypeGuard guard(type);
 
   auto storage = ArrayFromJSON(binary(), R"(["foobar", null])");
   auto array = ExtensionType::WrapArray(type, storage);
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 685018f7de7b8..83c7ebed4f319 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -27,9 +27,10 @@
 #include "arrow/array/util.h"
 #include "arrow/chunked_array.h"
 #include "arrow/config.h"
-#ifdef ARROW_JSON
 #include "arrow/extension/bool8.h"
+#ifdef ARROW_JSON
 #include "arrow/extension/fixed_shape_tensor.h"
+#include "arrow/extension/opaque.h"
 #endif
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -143,17 +144,21 @@ static std::once_flag registry_initialized;
 namespace internal {
 
 static void CreateGlobalRegistry() {
+  // Register canonical extension types
+
   g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
+  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8()};
 
 #ifdef ARROW_JSON
-  // Register canonical extension types
-  auto fst_ext_type =
-      checked_pointer_cast<ExtensionType>(extension::fixed_shape_tensor(int64(), {}));
-  ARROW_CHECK_OK(g_registry->RegisterType(fst_ext_type));
-
-  auto bool8_ext_type = checked_pointer_cast<ExtensionType>(extension::bool8());
-  ARROW_CHECK_OK(g_registry->RegisterType(bool8_ext_type));
+  ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
+  ext_types.push_back(extension::opaque(null(), "", ""));
 #endif
+
+  // Register canonical extension types
+  for (const auto& ext_type : ext_types) {
+    ARROW_CHECK_OK(
+        g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
+  }
 }
 
 }  // namespace internal
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index b04ee85ec99ad..0d50c467e96bd 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1693,9 +1693,8 @@ def test_opaque_type(pickle_module, storage_type, storage):
     arr = pa.ExtensionArray.from_storage(opaque_type, storage)
     assert isinstance(arr, opaque_arr_class)
 
-    with registered_extension_type(opaque_type):
-        buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
-        batch = ipc_read_batch(buf)
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
+    batch = ipc_read_batch(buf)
 
     assert batch.column(0).type.extension_name == "arrow.opaque"
     assert isinstance(batch.column(0), opaque_arr_class)

From 3e9384bbf4162ea060e867a753bce464b31e5e1c Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Thu, 22 Aug 2024 15:27:40 +0200
Subject: [PATCH 02/63] GH-43519: [Python] Set up wheel building for Python
 3.13 (#43539)

### Rationale for this change

Like #43519 mentionies, now that the first `rc` is out, it's probably time to add CI coverage for Python 3.13 (and also start building wheels).

### What changes are included in this PR?

I'm fairly new to the build/CI processes of the project, but I tried to follow the same template as #37901. I'll follow up afterwards with adding CI coverage for the free-threaded build as well.
* GitHub Issue: #43519

Lead-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .env                                               |  2 +-
 ci/docker/python-wheel-manylinux-test.dockerfile   |  7 ++++---
 ci/docker/python-wheel-manylinux.dockerfile        |  2 +-
 .../python-wheel-windows-test-vs2019.dockerfile    |  7 ++++---
 ci/docker/python-wheel-windows-vs2019.dockerfile   |  7 ++++---
 ci/scripts/install_gcs_testbench.sh                | 10 +++++++---
 ci/scripts/install_python.sh                       | 14 +++++++++++---
 ci/scripts/python_wheel_macos_build.sh             |  2 --
 dev/release/verify-release-candidate.sh            |  6 +++---
 dev/tasks/python-wheels/github.linux.yml           |  5 +++++
 dev/tasks/python-wheels/github.osx.yml             |  2 +-
 dev/tasks/tasks.yml                                |  3 ++-
 docker-compose.yml                                 |  9 ++++++---
 python/pyproject.toml                              |  1 +
 python/requirements-wheel-build.txt                |  5 +++++
 python/requirements-wheel-test.txt                 |  7 +++++++
 16 files changed, 62 insertions(+), 27 deletions(-)

diff --git a/.env b/.env
index 1358aafe824a6..21f904c3208f6 100644
--- a/.env
+++ b/.env
@@ -95,7 +95,7 @@ VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01"    # 2024.04.26 Release
 # ci/docker/python-wheel-windows-vs2019.dockerfile.
 # This is a workaround for our CI problem that "archery docker build" doesn't
 # use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-06-18
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-08-06
 
 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker-compose run --rm conan".
 # See https://github.com/conan-io/conan-docker-tools#readme and
diff --git a/ci/docker/python-wheel-manylinux-test.dockerfile b/ci/docker/python-wheel-manylinux-test.dockerfile
index cdd0ae3ced756..443ff9c53cbcb 100644
--- a/ci/docker/python-wheel-manylinux-test.dockerfile
+++ b/ci/docker/python-wheel-manylinux-test.dockerfile
@@ -16,8 +16,8 @@
 # under the License.
 
 ARG arch
-ARG python
-FROM ${arch}/python:${python}
+ARG python_image_tag
+FROM ${arch}/python:${python_image_tag}
 
 # RUN pip install --upgrade pip
 
@@ -27,4 +27,5 @@ COPY python/requirements-wheel-test.txt /arrow/python/
 RUN pip install -r /arrow/python/requirements-wheel-test.txt
 
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-RUN PYTHON=python /arrow/ci/scripts/install_gcs_testbench.sh default
+ARG python
+RUN PYTHON_VERSION=${python} /arrow/ci/scripts/install_gcs_testbench.sh default
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index cb39667af1e10..42f088fd8a22a 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -103,7 +103,7 @@ RUN vcpkg install \
 # Configure Python for applications running in the bash shell of this Dockerfile
 ARG python=3.8
 ENV PYTHON_VERSION=${python}
-RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-*) && \
+RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}) && \
     echo "export PATH=$PYTHON_ROOT/bin:\$PATH" >> /etc/profile.d/python.sh
 
 SHELL ["/bin/bash", "-i", "-c"]
diff --git a/ci/docker/python-wheel-windows-test-vs2019.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
index 32bbb55e82689..5f488a4c285ff 100644
--- a/ci/docker/python-wheel-windows-test-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -40,10 +40,11 @@ ARG python=3.8
 RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
     (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
     (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.5" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.0" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts")
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
 
 # Install archiver to extract xz archives
-RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION% & \
+RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION% & \
     python -m pip install --no-cache-dir -U pip setuptools & \
     choco install --no-progress -r -y archiver
diff --git a/ci/docker/python-wheel-windows-vs2019.dockerfile b/ci/docker/python-wheel-windows-vs2019.dockerfile
index ff42de939d91f..5a17e3e4c52c2 100644
--- a/ci/docker/python-wheel-windows-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2019.dockerfile
@@ -83,9 +83,10 @@ ARG python=3.8
 RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
     (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
     (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.5" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.0" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts")
-RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
+RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION%
 RUN python -m pip install -U pip setuptools
 
 COPY python/requirements-wheel-build.txt arrow/python/
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 2090290c99322..5471b3cc238ca 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -41,8 +41,12 @@ version=$1
 if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
   # Latests versions of Testbench require newer setuptools
-  ${PYTHON:-python3} -m pip install --upgrade setuptools
+  python3 -m pip install --upgrade setuptools
 fi
 
-${PYTHON:-python3} -m pip install \
-  "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
+# This script is run with PYTHON undefined in some places,
+# but those only use older pythons.
+if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
+  python3 -m pip install \
+    "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
+fi
diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
index 5f962f02b911b..42d0e9ca179fb 100755
--- a/ci/scripts/install_python.sh
+++ b/ci/scripts/install_python.sh
@@ -28,8 +28,9 @@ declare -A versions
 versions=([3.8]=3.8.10
           [3.9]=3.9.13
           [3.10]=3.10.11
-          [3.11]=3.11.5
-          [3.12]=3.12.0)
+          [3.11]=3.11.9
+          [3.12]=3.12.4
+          [3.13]=3.13.0)
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <platform> <version>"
@@ -46,7 +47,14 @@ full_version=${versions[$2]}
 if [ $platform = "macOS" ]; then
     echo "Downloading Python installer..."
 
-    if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ] || [ "$version" = "3.11" ] || [ "$version" = "3.12" ]; then
+    if [ "$version" = "3.13" ];
+    then
+        fname="python-${full_version}rc1-macos11.pkg"
+    elif [ "$(uname -m)" = "arm64" ] || \
+         [ "$version" = "3.10" ] || \
+         [ "$version" = "3.11" ] || \
+         [ "$version" = "3.12" ];
+    then
         fname="python-${full_version}-macos11.pkg"
     else
         fname="python-${full_version}-macosx10.9.pkg"
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 3ed9d5d8dd12f..d5430f26748eb 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -48,13 +48,11 @@ fi
 
 echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
 export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
-export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
 
 pip install \
   --upgrade \
   --only-binary=:all: \
   --target $PIP_SITE_PACKAGES \
-  --platform $PIP_TARGET_PLATFORM \
   -r ${source_dir}/python/requirements-wheel-build.txt
 pip install "delocate>=0.10.3"
 
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 6a36109dc2fc1..07e765a759ea0 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -1146,7 +1146,7 @@ test_linux_wheels() {
     local arch="x86_64"
   fi
 
-  local python_versions="${TEST_PYTHON_VERSIONS:-3.8 3.9 3.10 3.11 3.12}"
+  local python_versions="${TEST_PYTHON_VERSIONS:-3.8 3.9 3.10 3.11 3.12 3.13}"
   local platform_tags="${TEST_WHEEL_PLATFORM_TAGS:-manylinux_2_17_${arch}.manylinux2014_${arch} manylinux_2_28_${arch}}"
 
   for python in ${python_versions}; do
@@ -1170,11 +1170,11 @@ test_macos_wheels() {
 
   # apple silicon processor
   if [ "$(uname -m)" = "arm64" ]; then
-    local python_versions="3.8 3.9 3.10 3.11 3.12"
+    local python_versions="3.8 3.9 3.10 3.11 3.12 3.13"
     local platform_tags="macosx_11_0_arm64"
     local check_flight=OFF
   else
-    local python_versions="3.8 3.9 3.10 3.11 3.12"
+    local python_versions="3.8 3.9 3.10 3.11 3.12 3.13"
     local platform_tags="macosx_10_15_x86_64"
   fi
 
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 968c5da21897b..2854d4349fb7c 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -36,6 +36,11 @@ jobs:
       ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
       {% endif %}
       PYTHON: "{{ python_version }}"
+      {% if python_version == "3.13" %}
+      PYTHON_IMAGE_TAG: "3.13-rc"
+      {% else %}
+      PYTHON_IMAGE_TAG: "{{ python_version }}"
+      {% endif %}
 
     steps:
       {{ macros.github_checkout_arrow()|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index 8ceb468af89dd..b26aeba32b79b 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -121,7 +121,7 @@ jobs:
           source test-env/bin/activate
           pip install --upgrade pip wheel
           arch -{{ arch }} pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON=python arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
+          PYTHON_VERSION={{ python_version }} arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
           arch -{{ arch }} arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index fe02fe9ce68b2..60114d6930878 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -389,7 +389,8 @@ tasks:
                                                ("3.9", "cp39", "cp39"),
                                                ("3.10", "cp310", "cp310"),
                                                ("3.11", "cp311", "cp311"),
-                                               ("3.12", "cp312", "cp312")] %}
+                                               ("3.12", "cp312", "cp312"),
+                                               ("3.13", "cp313", "cp313")] %}
 
 {############################## Wheel Linux ##################################}
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 14eeeeee6e5ef..3045cf015bc26 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1096,9 +1096,10 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-02-04-ea37246
+        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-08-03-32dfa47
         vcpkg: ${VCPKG}
         python: ${PYTHON}
+        python_image_tag: ${PYTHON_IMAGE_TAG}
         manylinux: 2014
       context: .
       dockerfile: ci/docker/python-wheel-manylinux.dockerfile
@@ -1119,9 +1120,10 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-02-04-ea37246
+        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-08-03-32dfa47
         vcpkg: ${VCPKG}
         python: ${PYTHON}
+        python_image_tag: ${PYTHON_IMAGE_TAG}
         manylinux: 2_28
       context: .
       dockerfile: ci/docker/python-wheel-manylinux.dockerfile
@@ -1135,7 +1137,7 @@ services:
     command: /arrow/ci/scripts/python_wheel_manylinux_build.sh
 
   python-wheel-manylinux-test-imports:
-    image: ${ARCH}/python:${PYTHON}
+    image: ${ARCH}/python:${PYTHON_IMAGE_TAG}
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
@@ -1151,6 +1153,7 @@ services:
       args:
         arch: ${ARCH}
         python: ${PYTHON}
+        python_image_tag: ${PYTHON_IMAGE_TAG}
       context: .
       dockerfile: ci/docker/python-wheel-manylinux-test.dockerfile
       cache_from:
diff --git a/python/pyproject.toml b/python/pyproject.toml
index d863bb3e5f0ac..8ece65dd467bb 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -48,6 +48,7 @@ classifiers  = [
     'Programming Language :: Python :: 3.10',
     'Programming Language :: Python :: 3.11',
     'Programming Language :: Python :: 3.12',
+    'Programming Language :: Python :: 3.13',
 ]
 maintainers = [
     {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"}
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index faa078d3d7fe7..2d448004768ce 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,3 +1,8 @@
+# Remove pre and extra index url once there's NumPy and Cython wheels for 3.13
+# on PyPI
+--pre
+--extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+
 cython>=0.29.31
 oldest-supported-numpy>=0.14; python_version<'3.9'
 numpy>=2.0.0; python_version>='3.9'
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index c7ff63e339575..98ec2bd4fd4e4 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -1,3 +1,9 @@
+# Remove pre and extra index url once there's NumPy and Cython wheels for 3.13
+# on PyPI
+--pre
+--prefer-binary
+--extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+
 cffi
 cython
 hypothesis
@@ -12,5 +18,6 @@ tzdata; sys_platform == 'win32'
 numpy~=1.21.3; python_version < "3.11"
 numpy~=1.23.2; python_version == "3.11"
 numpy~=1.26.0; python_version == "3.12"
+numpy~=2.1.0; python_version >= "3.13"
 
 pandas

From 88d57cf41fde20adf14adca02e02d2cb92c83443 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 22 Aug 2024 08:45:19 -0500
Subject: [PATCH 03/63] MINOR: [CI][R] Undo #43636 now that the action is
 approved (#43730)

Undo the pinning in #43636 now that INFRA has approved the quarto-dev action

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/r.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index bf7eb99e7e990..2820d42470bca 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -86,19 +86,18 @@ jobs:
       run: |
         sudo apt-get install devscripts
 
-    # replace the SHA with v2 once INFRA-26031 is resolved
-    - uses: r-lib/actions/setup-r@732fb28088814627972f1ccbacc02561178cf391
+    - uses: r-lib/actions/setup-r@v2
       with:
         use-public-rspm: true
         install-r: false
 
-    - uses: r-lib/actions/setup-r-dependencies@732fb28088814627972f1ccbacc02561178cf391
+    - uses: r-lib/actions/setup-r-dependencies@v2
       with:
         extra-packages: any::rcmdcheck
         needs: check
         working-directory: src/r
 
-    - uses: r-lib/actions/check-r-package@732fb28088814627972f1ccbacc02561178cf391
+    - uses: r-lib/actions/check-r-package@v2
       with:
         working-directory: src/r
       env:
@@ -341,11 +340,11 @@ jobs:
           cd r/windows
           ls *.zip | xargs -n 1 unzip -uo
           rm -rf *.zip
-      - uses: r-lib/actions/setup-r@732fb28088814627972f1ccbacc02561178cf391
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.rversion }}
           Ncpus: 2
-      - uses: r-lib/actions/setup-r-dependencies@732fb28088814627972f1ccbacc02561178cf391
+      - uses: r-lib/actions/setup-r-dependencies@v2
         env:
           GITHUB_PAT: "${{ github.token }}"
         with:

From 2e33e98f583035cd686455870e9cbf5fb6dc9966 Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Thu, 22 Aug 2024 08:26:37 -0800
Subject: [PATCH 04/63] MINOR: [GO] fixup test case name in cast_test.go
 (#43780)

---
 go/arrow/compute/cast_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go
index 2e748a2fee9c2..fa08467dd3946 100644
--- a/go/arrow/compute/cast_test.go
+++ b/go/arrow/compute/cast_test.go
@@ -2636,7 +2636,7 @@ func (c *CastSuite) TestStructToDifferentNullabilityStruct() {
 		defer dest3Nullable.Release()
 		checkCast(c.T(), srcNonNull, dest3Nullable, *compute.DefaultCastOptions(true))
 	})
-	c.Run("non-nullable to nullable", func() {
+	c.Run("nullable to non-nullable", func() {
 		fieldsSrcNullable := []arrow.Field{
 			{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true},
 			{Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true},

From 76e0f6254b75509d83e44fe8997bd14007907c4f Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Thu, 22 Aug 2024 15:37:09 -0400
Subject: [PATCH 05/63] GH-43764: [Go][FlightSQL] Add NewPreparedStatement
 function (#43781)

### Rationale for this change
Allowing creation of the prepared statement object outside of the client allows for logging, proxying, and handing off prepared statements if necessary.

### Are these changes tested?
Yes

* GitHub Issue: #43764

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/flightsql/client.go      |  9 +++++++++
 go/arrow/flight/flightsql/client_test.go | 21 +++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index 4a600e5253e9b..4c9dc50135108 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -1102,6 +1102,15 @@ type PreparedStatement struct {
 	closed        bool
 }
 
+// NewPreparedStatement creates a prepared statement object bound to the provided
+// client using the given handle. In general, it should be sufficient to use the
+// Prepare function a client and this wouldn't be needed. But this can be used
+// to propagate a prepared statement from one client to another if needed or if
+// proxying requests.
+func NewPreparedStatement(client *Client, handle []byte) *PreparedStatement {
+	return &PreparedStatement{client: client, handle: handle}
+}
+
 // Execute executes the prepared statement on the server and returns a FlightInfo
 // indicating where to retrieve the response. If SetParameters has been called
 // then the parameter bindings will be sent before execution.
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index 7604b554cbc6c..d060161f94f0f 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -378,8 +378,10 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	createRsp := &mockDoActionClient{}
 	defer createRsp.AssertExpectations(s.T())
 	createRsp.On("Recv").Return(&pb.Result{Body: data}, nil).Once()
-	createRsp.On("Recv").Return(&pb.Result{}, io.EOF)
-	createRsp.On("CloseSend").Return(nil)
+	createRsp.On("Recv").Return(&pb.Result{}, io.EOF).Once()
+	createRsp.On("Recv").Return(&pb.Result{Body: data}, nil).Once()
+	createRsp.On("Recv").Return(&pb.Result{}, io.EOF).Once()
+	createRsp.On("CloseSend").Return(nil).Twice()
 
 	closeRsp := &mockDoActionClient{}
 	defer closeRsp.AssertExpectations(s.T())
@@ -387,13 +389,13 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	closeRsp.On("CloseSend").Return(nil)
 
 	s.mockClient.On("DoAction", flightsql.CreatePreparedStatementActionType, action.Body, s.callOpts).
-		Return(createRsp, nil)
+		Return(createRsp, nil).Twice()
 	s.mockClient.On("DoAction", flightsql.ClosePreparedStatementActionType, closeAct.Body, s.callOpts).
 		Return(closeRsp, nil)
 
 	infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)}
 	desc := getDesc(infoCmd)
-	s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil)
+	s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil).Twice()
 
 	prepared, err := s.sqlClient.Prepare(context.TODO(), query, s.callOpts...)
 	s.NoError(err)
@@ -404,6 +406,17 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	info, err := prepared.Execute(context.TODO(), s.callOpts...)
 	s.NoError(err)
 	s.Equal(&emptyFlightInfo, info)
+
+	prepared, err = s.sqlClient.Prepare(context.TODO(), query, s.callOpts...)
+	s.NoError(err)
+
+	secondPrepare := flightsql.NewPreparedStatement(&s.sqlClient, prepared.Handle())
+	s.Equal(string(secondPrepare.Handle()), "query")
+	defer secondPrepare.Close(context.TODO(), s.callOpts...)
+
+	info, err = secondPrepare.Execute(context.TODO(), s.callOpts...)
+	s.NoError(err)
+	s.Equal(&emptyFlightInfo, info)
 }
 
 func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() {

From d47b305bbce037af18ce65dc968074fe1681b4d4 Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com>
Date: Thu, 22 Aug 2024 16:04:59 -0400
Subject: [PATCH 06/63] GH-43624: [Go] Add JSON/UUID extension types, extend
 arrow -> parquet logical type mapping (#43679)

### Rationale for this change

- Missing `JSON` extension type implementation.
- Current precedent in C++ (and thereby PyArrow) is that canonical extension types do not require manual registration.
- Issues like #43640 and #43624 suggest that we need to expose ways of configuring parquet types written from arrow records, but casting the underlying data presents challenges for a generalized approach.

### What changes are included in this PR?

- Move `UUIDType` from `internal` to `arrow/extensions`
- Implement `JSON` canonical extension type
- Automatically register all canonical extension types at initialization
  - remove register/unregister from various locations these extension types are used
- Add new `CustomParquetType` interface so extension types can specify their target `LogicalType` in Parquet
- Refactor parquet `fieldToNode` to split up `PrimitiveNode` type mapping for leaves from `GroupNode` composition
- Simplify parquet `LogicalType` to use only value receivers

### Are these changes tested?

Yes

### Are there any user-facing changes?

- `UUID` and `JSON` extension types are available to end users.
- Canonical extension types will automatically be recognized in IPC without registration.
- Users with their own extension type implementations may use the `CustomParquetType` interface to control Parquet conversion without needing to fork or upstream the change.

* GitHub Issue: #43624

Authored-by: Joel Lubinitsky <joellubi@gmail.com>
Signed-off-by: Joel Lubinitsky <joellubi@gmail.com>
---
 docs/source/status.rst                        |   6 +
 go/arrow/array/array_test.go                  |   4 +-
 go/arrow/array/diff_test.go                   |   4 +-
 go/arrow/array/extension_test.go              |  10 -
 go/arrow/avro/reader_types.go                 |   4 +-
 go/arrow/avro/schema.go                       |   4 +-
 go/arrow/compute/exec/span_test.go            |   6 +-
 go/arrow/csv/reader_test.go                   |   4 +-
 go/arrow/csv/writer_test.go                   |   6 +-
 go/arrow/datatype_extension_test.go           |  18 +-
 go/arrow/extensions/bool8_test.go             |   3 -
 go/arrow/extensions/extensions.go             |  36 +++
 go/arrow/extensions/json.go                   | 148 ++++++++++
 go/arrow/extensions/json_test.go              | 268 ++++++++++++++++++
 go/arrow/extensions/opaque_test.go            |   3 -
 go/arrow/extensions/uuid.go                   | 265 +++++++++++++++++
 go/arrow/extensions/uuid_test.go              | 257 +++++++++++++++++
 .../internal/flight_integration/scenario.go   |   4 -
 .../cmd/arrow-json-integration-test/main.go   |   4 -
 go/arrow/ipc/metadata_test.go                 |  11 +-
 go/internal/types/extension_types.go          | 227 +--------------
 go/internal/types/extension_types_test.go     |  95 -------
 go/parquet/cmd/parquet_reader/main.go         |   2 +-
 go/parquet/metadata/app_version.go            |   2 +-
 go/parquet/pqarrow/encode_arrow_test.go       |  82 ++++--
 go/parquet/pqarrow/path_builder_test.go       |   6 +-
 go/parquet/pqarrow/schema.go                  | 228 +++++++--------
 go/parquet/pqarrow/schema_test.go             |  15 +-
 go/parquet/schema/converted_types.go          |   8 +-
 go/parquet/schema/logical_types.go            |  30 +-
 go/parquet/schema/logical_types_test.go       |  40 +--
 go/parquet/schema/schema_element_test.go      |   4 +-
 32 files changed, 1221 insertions(+), 583 deletions(-)
 create mode 100644 go/arrow/extensions/extensions.go
 create mode 100644 go/arrow/extensions/json.go
 create mode 100644 go/arrow/extensions/json_test.go
 create mode 100644 go/arrow/extensions/uuid.go
 create mode 100644 go/arrow/extensions/uuid_test.go
 delete mode 100644 go/internal/types/extension_types_test.go

diff --git a/docs/source/status.rst b/docs/source/status.rst
index c232aa280befb..5e2c2cc19c890 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -119,6 +119,12 @@ Data Types
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Variable shape tensor |       |       |       |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| JSON                  |       |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| UUID                  |       |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| 8-bit Boolean         | ✓     |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 Notes:
 
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index 4d83766b4fa3e..4f0627c600078 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -21,9 +21,9 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -122,7 +122,7 @@ func TestMakeFromData(t *testing.T) {
 		{name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)},
 
 		{name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"},
-		{name: "extension", d: types.NewUUIDType()},
+		{name: "extension", d: extensions.NewUUIDType()},
 
 		{name: "run end encoded", d: arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{
 			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go
index 65d212be11838..9c9ce6a53aed0 100644
--- a/go/arrow/array/diff_test.go
+++ b/go/arrow/array/diff_test.go
@@ -25,9 +25,9 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/internal/json"
-	"github.com/apache/arrow/go/v18/internal/types"
 )
 
 type diffTestCase struct {
@@ -861,7 +861,7 @@ func TestEdits_UnifiedDiff(t *testing.T) {
 		},
 		{
 			name:       "extensions",
-			dataType:   types.NewUUIDType(),
+			dataType:   extensions.NewUUIDType(),
 			baseJSON:   `["00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001"]`,
 			targetJSON: `["00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002"]`,
 			want: `@@ -0, +0 @@
diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go
index 71ea9f105af7c..26245cf015dec 100644
--- a/go/arrow/array/extension_test.go
+++ b/go/arrow/array/extension_test.go
@@ -30,16 +30,6 @@ type ExtensionTypeTestSuite struct {
 	suite.Suite
 }
 
-func (e *ExtensionTypeTestSuite) SetupTest() {
-	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (e *ExtensionTypeTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		e.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (e *ExtensionTypeTestSuite) TestParametricEquals() {
 	p1Type := types.NewParametric1Type(6)
 	p2Type := types.NewParametric1Type(6)
diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go
index e07cd380d511f..dab2b33dce601 100644
--- a/go/arrow/avro/reader_types.go
+++ b/go/arrow/avro/reader_types.go
@@ -27,8 +27,8 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 )
 
 type dataLoader struct {
@@ -436,7 +436,7 @@ func mapFieldBuilders(b array.Builder, field arrow.Field, parent *fieldPos) {
 			}
 			return nil
 		}
-	case *types.UUIDBuilder:
+	case *extensions.UUIDBuilder:
 		f.appendFunc = func(data interface{}) error {
 			switch dt := data.(type) {
 			case nil:
diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go
index 007dad06c19cd..a6de3718d3ccf 100644
--- a/go/arrow/avro/schema.go
+++ b/go/arrow/avro/schema.go
@@ -24,7 +24,7 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
-	"github.com/apache/arrow/go/v18/internal/types"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/internal/utils"
 	avro "github.com/hamba/avro/v2"
 )
@@ -349,7 +349,7 @@ func avroLogicalToArrowField(n *schemaNode) {
 		// The uuid logical type represents a random generated universally unique identifier (UUID).
 		// A uuid logical type annotates an Avro string. The string has to conform with RFC-4122
 	case "uuid":
-		dt = types.NewUUIDType()
+		dt = extensions.NewUUIDType()
 
 	// The date logical type represents a date within the calendar, with no reference to a particular
 	// time zone or time of day.
diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go
index f5beb45ee1494..018fbb7d623d9 100644
--- a/go/arrow/compute/exec/span_test.go
+++ b/go/arrow/compute/exec/span_test.go
@@ -29,6 +29,7 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/compute/exec"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/apache/arrow/go/v18/internal/types"
@@ -192,9 +193,6 @@ func TestArraySpan_NumBuffers(t *testing.T) {
 		Children []exec.ArraySpan
 	}
 
-	arrow.RegisterExtensionType(types.NewUUIDType())
-	defer arrow.UnregisterExtensionType("uuid")
-
 	tests := []struct {
 		name   string
 		fields fields
@@ -207,7 +205,7 @@ func TestArraySpan_NumBuffers(t *testing.T) {
 		{"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3},
 		{"string", fields{Type: arrow.BinaryTypes.String}, 3},
 		{"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3},
-		{"extension", fields{Type: types.NewUUIDType()}, 2},
+		{"extension", fields{Type: extensions.NewUUIDType()}, 2},
 		{"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2},
 	}
 	for _, tt := range tests {
diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go
index b0775b9b11a96..6a89d49704298 100644
--- a/go/arrow/csv/reader_test.go
+++ b/go/arrow/csv/reader_test.go
@@ -30,8 +30,8 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/csv"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -356,7 +356,7 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool, stringsCanBeN
 			{Name: "binary", Type: arrow.BinaryTypes.Binary},
 			{Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary},
 			{Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}},
-			{Name: "uuid", Type: types.NewUUIDType()},
+			{Name: "uuid", Type: extensions.NewUUIDType()},
 			{Name: "date32", Type: arrow.PrimitiveTypes.Date32},
 			{Name: "date64", Type: arrow.PrimitiveTypes.Date64},
 		},
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index be9ab961c3ef7..2ae01a6d49071 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -31,9 +31,9 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/csv"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/float16"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/google/uuid"
 )
 
@@ -230,7 +230,7 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 			{Name: "binary", Type: arrow.BinaryTypes.Binary},
 			{Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary},
 			{Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}},
-			{Name: "uuid", Type: types.NewUUIDType()},
+			{Name: "uuid", Type: extensions.NewUUIDType()},
 			{Name: "null", Type: arrow.Null},
 		},
 		nil,
@@ -285,7 +285,7 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 	b.Field(22).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
 	b.Field(23).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
 	b.Field(24).(*array.FixedSizeBinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
-	b.Field(25).(*types.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"), uuid.MustParse("00000000-0000-0000-0000-000000000002"), uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
+	b.Field(25).(*extensions.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"), uuid.MustParse("00000000-0000-0000-0000-000000000002"), uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
 	b.Field(26).(*array.NullBuilder).AppendEmptyValues(3)
 
 	for _, field := range b.Fields() {
diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go
index c3e595f523e57..7244d377bd285 100644
--- a/go/arrow/datatype_extension_test.go
+++ b/go/arrow/datatype_extension_test.go
@@ -21,7 +21,7 @@ import (
 	"testing"
 
 	"github.com/apache/arrow/go/v18/arrow"
-	"github.com/apache/arrow/go/v18/internal/types"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
@@ -50,24 +50,14 @@ type ExtensionTypeTestSuite struct {
 	suite.Suite
 }
 
-func (e *ExtensionTypeTestSuite) SetupTest() {
-	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (e *ExtensionTypeTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		e.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (e *ExtensionTypeTestSuite) TestExtensionType() {
 	e.Nil(arrow.GetExtensionType("uuid-unknown"))
-	e.NotNil(arrow.GetExtensionType("uuid"))
+	e.NotNil(arrow.GetExtensionType("arrow.uuid"))
 
-	e.Error(arrow.RegisterExtensionType(types.NewUUIDType()))
+	e.Error(arrow.RegisterExtensionType(extensions.NewUUIDType()))
 	e.Error(arrow.UnregisterExtensionType("uuid-unknown"))
 
-	typ := types.NewUUIDType()
+	typ := extensions.NewUUIDType()
 	e.Implements((*arrow.ExtensionType)(nil), typ)
 	e.Equal(arrow.EXTENSION, typ.ID())
 	e.Equal("extension", typ.Name())
diff --git a/go/arrow/extensions/bool8_test.go b/go/arrow/extensions/bool8_test.go
index 9f7365d1555fb..ff129e24bc8f0 100644
--- a/go/arrow/extensions/bool8_test.go
+++ b/go/arrow/extensions/bool8_test.go
@@ -178,9 +178,6 @@ func TestReinterpretStorageEqualToValues(t *testing.T) {
 
 func TestBool8TypeBatchIPCRoundTrip(t *testing.T) {
 	typ := extensions.NewBool8Type()
-	arrow.RegisterExtensionType(typ)
-	defer arrow.UnregisterExtensionType(typ.ExtensionName())
-
 	storage, _, err := array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8,
 		strings.NewReader(`[-1, 0, 1, 2, null]`))
 	require.NoError(t, err)
diff --git a/go/arrow/extensions/extensions.go b/go/arrow/extensions/extensions.go
new file mode 100644
index 0000000000000..03c6923e95f4f
--- /dev/null
+++ b/go/arrow/extensions/extensions.go
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"github.com/apache/arrow/go/v18/arrow"
+)
+
+var canonicalExtensionTypes = []arrow.ExtensionType{
+	&Bool8Type{},
+	&UUIDType{},
+	&OpaqueType{},
+	&JSONType{},
+}
+
+func init() {
+	for _, extType := range canonicalExtensionTypes {
+		if err := arrow.RegisterExtensionType(extType); err != nil {
+			panic(err)
+		}
+	}
+}
diff --git a/go/arrow/extensions/json.go b/go/arrow/extensions/json.go
new file mode 100644
index 0000000000000..12c49f9c0a76d
--- /dev/null
+++ b/go/arrow/extensions/json.go
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"fmt"
+	"reflect"
+	"slices"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet/schema"
+)
+
+var jsonSupportedStorageTypes = []arrow.DataType{
+	arrow.BinaryTypes.String,
+	arrow.BinaryTypes.LargeString,
+	arrow.BinaryTypes.StringView,
+}
+
+// JSONType represents a UTF-8 encoded JSON string as specified in RFC8259.
+type JSONType struct {
+	arrow.ExtensionBase
+}
+
+// ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
+func (b *JSONType) ParquetLogicalType() schema.LogicalType {
+	return schema.JSONLogicalType{}
+}
+
+// NewJSONType creates a new JSONType with the specified storage type.
+// storageType must be one of String, LargeString, StringView.
+func NewJSONType(storageType arrow.DataType) (*JSONType, error) {
+	if !slices.Contains(jsonSupportedStorageTypes, storageType) {
+		return nil, fmt.Errorf("unsupported storage type for JSON extension type: %s", storageType)
+	}
+	return &JSONType{ExtensionBase: arrow.ExtensionBase{Storage: storageType}}, nil
+}
+
+func (b *JSONType) ArrayType() reflect.Type { return reflect.TypeOf(JSONArray{}) }
+
+func (b *JSONType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if !(data == "" || data == "{}") {
+		return nil, fmt.Errorf("serialized metadata for JSON extension type must be '' or '{}', found: %s", data)
+	}
+	return NewJSONType(storageType)
+}
+
+func (b *JSONType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return b.ExtensionName() == other.ExtensionName() && arrow.TypeEqual(b.Storage, other.StorageType())
+}
+
+func (b *JSONType) ExtensionName() string { return "arrow.json" }
+
+func (b *JSONType) Serialize() string { return "" }
+
+func (b *JSONType) String() string {
+	return fmt.Sprintf("extension<%s[storage_type=%s]>", b.ExtensionName(), b.Storage)
+}
+
+// JSONArray is logically an array of UTF-8 encoded JSON strings.
+// Its values are unmarshaled to native Go values.
+type JSONArray struct {
+	array.ExtensionArrayBase
+}
+
+func (a *JSONArray) String() string {
+	b, err := a.MarshalJSON()
+	if err != nil {
+		panic(fmt.Sprintf("failed marshal JSONArray: %s", err))
+	}
+
+	return string(b)
+}
+
+func (a *JSONArray) Value(i int) any {
+	val := a.ValueBytes(i)
+
+	var res any
+	if err := json.Unmarshal(val, &res); err != nil {
+		panic(err)
+	}
+
+	return res
+}
+
+func (a *JSONArray) ValueStr(i int) string {
+	return string(a.ValueBytes(i))
+}
+
+func (a *JSONArray) ValueBytes(i int) []byte {
+	// convert to json.RawMessage, set to nil if elem isNull.
+	val := a.ValueJSON(i)
+
+	// simply returns wrapped bytes, or null if val is nil.
+	b, err := val.MarshalJSON()
+	if err != nil {
+		panic(err)
+	}
+
+	return b
+}
+
+// ValueJSON wraps the underlying string value as a json.RawMessage,
+// or returns nil if the array value is null.
+func (a *JSONArray) ValueJSON(i int) json.RawMessage {
+	var val json.RawMessage
+	if a.IsValid(i) {
+		val = json.RawMessage(a.Storage().(array.StringLike).Value(i))
+	}
+	return val
+}
+
+// MarshalJSON implements json.Marshaler.
+// Marshaling json.RawMessage is a no-op, except that nil values will
+// be marshaled as a JSON null.
+func (a *JSONArray) MarshalJSON() ([]byte, error) {
+	values := make([]json.RawMessage, a.Len())
+	for i := 0; i < a.Len(); i++ {
+		values[i] = a.ValueJSON(i)
+	}
+	return json.Marshal(values)
+}
+
+// GetOneForMarshal implements arrow.Array.
+func (a *JSONArray) GetOneForMarshal(i int) interface{} {
+	return a.ValueJSON(i)
+}
+
+var (
+	_ arrow.ExtensionType  = (*JSONType)(nil)
+	_ array.ExtensionArray = (*JSONArray)(nil)
+)
diff --git a/go/arrow/extensions/json_test.go b/go/arrow/extensions/json_test.go
new file mode 100644
index 0000000000000..21acc58f93949
--- /dev/null
+++ b/go/arrow/extensions/json_test.go
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestJSONTypeBasics(t *testing.T) {
+	typ, err := extensions.NewJSONType(arrow.BinaryTypes.String)
+	require.NoError(t, err)
+
+	typLarge, err := extensions.NewJSONType(arrow.BinaryTypes.LargeString)
+	require.NoError(t, err)
+
+	typView, err := extensions.NewJSONType(arrow.BinaryTypes.StringView)
+	require.NoError(t, err)
+
+	assert.Equal(t, "arrow.json", typ.ExtensionName())
+	assert.Equal(t, "arrow.json", typLarge.ExtensionName())
+	assert.Equal(t, "arrow.json", typView.ExtensionName())
+
+	assert.True(t, typ.ExtensionEquals(typ))
+	assert.True(t, typLarge.ExtensionEquals(typLarge))
+	assert.True(t, typView.ExtensionEquals(typView))
+
+	assert.False(t, arrow.TypeEqual(arrow.BinaryTypes.String, typ))
+	assert.False(t, arrow.TypeEqual(typ, typLarge))
+	assert.False(t, arrow.TypeEqual(typ, typView))
+	assert.False(t, arrow.TypeEqual(typLarge, typView))
+
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.String, typ.StorageType()))
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.LargeString, typLarge.StorageType()))
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.StringView, typView.StorageType()))
+
+	assert.Equal(t, "extension<arrow.json[storage_type=utf8]>", typ.String())
+	assert.Equal(t, "extension<arrow.json[storage_type=large_utf8]>", typLarge.String())
+	assert.Equal(t, "extension<arrow.json[storage_type=string_view]>", typView.String())
+}
+
+var jsonTestCases = []struct {
+	Name           string
+	StorageType    arrow.DataType
+	StorageBuilder func(mem memory.Allocator) array.Builder
+}{
+	{
+		Name:           "string",
+		StorageType:    arrow.BinaryTypes.String,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewStringBuilder(mem) },
+	},
+	{
+		Name:           "large_string",
+		StorageType:    arrow.BinaryTypes.LargeString,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewLargeStringBuilder(mem) },
+	},
+	{
+		Name:           "string_view",
+		StorageType:    arrow.BinaryTypes.StringView,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewStringViewBuilder(mem) },
+	},
+}
+
+func TestJSONTypeCreateFromArray(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			require.Equal(t, "foobar", jsonArr.Value(0))
+			require.Equal(t, nil, jsonArr.Value(1))
+			require.Equal(t, map[string]any{"foo": "bar"}, jsonArr.Value(2))
+			require.Equal(t, float64(42), jsonArr.Value(3))
+			require.Equal(t, true, jsonArr.Value(4))
+			require.Equal(t, []any{float64(1), true, "3", nil, map[string]any{"five": float64(5)}}, jsonArr.Value(5))
+		})
+	}
+}
+
+func TestJSONTypeBatchIPCRoundTrip(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+				[]arrow.Array{arr}, -1)
+			defer batch.Release()
+
+			var written arrow.Record
+			{
+				var buf bytes.Buffer
+				wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+				require.NoError(t, wr.Write(batch))
+				require.NoError(t, wr.Close())
+
+				rdr, err := ipc.NewReader(&buf)
+				require.NoError(t, err)
+				written, err = rdr.Read()
+				require.NoError(t, err)
+				written.Retain()
+				defer written.Release()
+				rdr.Release()
+			}
+
+			assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+				batch.Schema(), written.Schema())
+
+			assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+				batch, written)
+		})
+	}
+}
+
+func TestMarshallJSONArray(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			b, err := jsonArr.MarshalJSON()
+			require.NoError(t, err)
+
+			expectedJSON := `["foobar",null,{"foo":"bar"},42,true,[1,true,"3",null,{"five":5}]]`
+			require.Equal(t, expectedJSON, string(b))
+			require.Equal(t, expectedJSON, jsonArr.String())
+		})
+	}
+}
+
+func TestJSONRecordToJSON(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			rec := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "json", Type: typ, Nullable: true}}, nil), []arrow.Array{jsonArr}, 6)
+			defer rec.Release()
+
+			buf := bytes.NewBuffer([]byte("\n")) // expected output has leading newline for clearer formatting
+			require.NoError(t, array.RecordToJSON(rec, buf))
+
+			expectedJSON := `
+				{"json":"foobar"}
+				{"json":null}
+				{"json":{"foo":"bar"}}
+				{"json":42}
+				{"json":true}
+				{"json":[1,true,"3",null,{"five":5}]}
+			`
+
+			expectedJSONLines := strings.Split(expectedJSON, "\n")
+			actualJSONLines := strings.Split(buf.String(), "\n")
+
+			require.Equal(t, len(expectedJSONLines), len(actualJSONLines))
+			for i := range expectedJSONLines {
+				if strings.TrimSpace(expectedJSONLines[i]) != "" {
+					require.JSONEq(t, expectedJSONLines[i], actualJSONLines[i])
+				}
+			}
+		})
+	}
+}
diff --git a/go/arrow/extensions/opaque_test.go b/go/arrow/extensions/opaque_test.go
index b6686e97bc027..a0fc8962ce5e4 100644
--- a/go/arrow/extensions/opaque_test.go
+++ b/go/arrow/extensions/opaque_test.go
@@ -161,9 +161,6 @@ func TestOpaqueTypeMetadataRoundTrip(t *testing.T) {
 
 func TestOpaqueTypeBatchRoundTrip(t *testing.T) {
 	typ := extensions.NewOpaqueType(arrow.BinaryTypes.String, "geometry", "adbc.postgresql")
-	arrow.RegisterExtensionType(typ)
-	defer arrow.UnregisterExtensionType(typ.ExtensionName())
-
 	storage, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String,
 		strings.NewReader(`["foobar", null]`))
 	require.NoError(t, err)
diff --git a/go/arrow/extensions/uuid.go b/go/arrow/extensions/uuid.go
new file mode 100644
index 0000000000000..422b9ea118800
--- /dev/null
+++ b/go/arrow/extensions/uuid.go
@@ -0,0 +1,265 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"strings"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet/schema"
+	"github.com/google/uuid"
+)
+
+type UUIDBuilder struct {
+	*array.ExtensionBuilder
+}
+
+// NewUUIDBuilder creates a new UUIDBuilder, exposing a convenient and efficient interface
+// for writing uuid.UUID (or [16]byte) values to the underlying FixedSizeBinary storage array.
+func NewUUIDBuilder(mem memory.Allocator) *UUIDBuilder {
+	return &UUIDBuilder{ExtensionBuilder: array.NewExtensionBuilder(mem, NewUUIDType())}
+}
+
+func (b *UUIDBuilder) Append(v uuid.UUID) {
+	b.AppendBytes(v)
+}
+
+func (b *UUIDBuilder) AppendBytes(v [16]byte) {
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).Append(v[:])
+}
+
+func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID) {
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).UnsafeAppend(v[:])
+}
+
+func (b *UUIDBuilder) AppendValueFromString(s string) error {
+	if s == array.NullValueStr {
+		b.AppendNull()
+		return nil
+	}
+
+	uid, err := uuid.Parse(s)
+	if err != nil {
+		return err
+	}
+
+	b.Append(uid)
+	return nil
+}
+
+func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool) {
+	if len(v) != len(valid) && len(valid) != 0 {
+		panic("len(v) != len(valid) && len(valid) != 0")
+	}
+
+	data := make([][]byte, len(v))
+	for i := range v {
+		if len(valid) > 0 && !valid[i] {
+			continue
+		}
+		data[i] = v[i][:]
+	}
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid)
+}
+
+func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error {
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	var val uuid.UUID
+	switch v := t.(type) {
+	case string:
+		val, err = uuid.Parse(v)
+		if err != nil {
+			return err
+		}
+	case []byte:
+		val, err = uuid.ParseBytes(v)
+		if err != nil {
+			return err
+		}
+	case nil:
+		b.AppendNull()
+		return nil
+	default:
+		return &json.UnmarshalTypeError{
+			Value:  fmt.Sprint(t),
+			Type:   reflect.TypeOf([]byte{}),
+			Offset: dec.InputOffset(),
+			Struct: fmt.Sprintf("FixedSizeBinary[%d]", 16),
+		}
+	}
+
+	b.Append(val)
+	return nil
+}
+
+func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error {
+	for dec.More() {
+		if err := b.UnmarshalOne(dec); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *UUIDBuilder) UnmarshalJSON(data []byte) error {
+	dec := json.NewDecoder(bytes.NewReader(data))
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	if delim, ok := t.(json.Delim); !ok || delim != '[' {
+		return fmt.Errorf("uuid builder must unpack from json array, found %s", delim)
+	}
+
+	return b.Unmarshal(dec)
+}
+
+// UUIDArray is a simple array which is a FixedSizeBinary(16)
+type UUIDArray struct {
+	array.ExtensionArrayBase
+}
+
+func (a *UUIDArray) String() string {
+	arr := a.Storage().(*array.FixedSizeBinary)
+	o := new(strings.Builder)
+	o.WriteString("[")
+	for i := 0; i < arr.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString(array.NullValueStr)
+		default:
+			fmt.Fprintf(o, "%q", a.Value(i))
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+func (a *UUIDArray) Value(i int) uuid.UUID {
+	if a.IsNull(i) {
+		return uuid.Nil
+	}
+	return uuid.Must(uuid.FromBytes(a.Storage().(*array.FixedSizeBinary).Value(i)))
+}
+
+func (a *UUIDArray) Values() []uuid.UUID {
+	values := make([]uuid.UUID, a.Len())
+	for i := range values {
+		values[i] = a.Value(i)
+	}
+	return values
+}
+
+func (a *UUIDArray) ValueStr(i int) string {
+	switch {
+	case a.IsNull(i):
+		return array.NullValueStr
+	default:
+		return a.Value(i).String()
+	}
+}
+
+func (a *UUIDArray) MarshalJSON() ([]byte, error) {
+	vals := make([]any, a.Len())
+	for i := range vals {
+		vals[i] = a.GetOneForMarshal(i)
+	}
+	return json.Marshal(vals)
+}
+
+func (a *UUIDArray) GetOneForMarshal(i int) interface{} {
+	if a.IsValid(i) {
+		return a.Value(i)
+	}
+	return nil
+}
+
+// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
+// to be used for representing UUIDs
+type UUIDType struct {
+	arrow.ExtensionBase
+}
+
+// ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
+func (e *UUIDType) ParquetLogicalType() schema.LogicalType {
+	return schema.UUIDLogicalType{}
+}
+
+// NewUUIDType is a convenience function to create an instance of UUIDType
+// with the correct storage type
+func NewUUIDType() *UUIDType {
+	return &UUIDType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
+}
+
+// ArrayType returns TypeOf(UUIDArray{}) for constructing UUID arrays
+func (*UUIDType) ArrayType() reflect.Type {
+	return reflect.TypeOf(UUIDArray{})
+}
+
+func (*UUIDType) ExtensionName() string {
+	return "arrow.uuid"
+}
+
+func (e *UUIDType) String() string {
+	return fmt.Sprintf("extension<%s>", e.ExtensionName())
+}
+
+func (e *UUIDType) MarshalJSON() ([]byte, error) {
+	return []byte(fmt.Sprintf(`{"name":"%s","metadata":%s}`, e.ExtensionName(), e.Serialize())), nil
+}
+
+func (*UUIDType) Serialize() string {
+	return ""
+}
+
+// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16}
+func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
+		return nil, fmt.Errorf("invalid storage type for UUIDType: %s", storageType.Name())
+	}
+	return NewUUIDType(), nil
+}
+
+// ExtensionEquals returns true if both extensions have the same name
+func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return e.ExtensionName() == other.ExtensionName()
+}
+
+func (*UUIDType) NewBuilder(mem memory.Allocator) array.Builder {
+	return NewUUIDBuilder(mem)
+}
+
+var (
+	_ arrow.ExtensionType          = (*UUIDType)(nil)
+	_ array.CustomExtensionBuilder = (*UUIDType)(nil)
+	_ array.ExtensionArray         = (*UUIDArray)(nil)
+	_ array.Builder                = (*UUIDBuilder)(nil)
+)
diff --git a/go/arrow/extensions/uuid_test.go b/go/arrow/extensions/uuid_test.go
new file mode 100644
index 0000000000000..80c621db2a0d5
--- /dev/null
+++ b/go/arrow/extensions/uuid_test.go
@@ -0,0 +1,257 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/google/uuid"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+var testUUID = uuid.New()
+
+func TestUUIDExtensionBuilder(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+	builder := extensions.NewUUIDBuilder(mem)
+	builder.Append(testUUID)
+	builder.AppendNull()
+	builder.AppendBytes(testUUID)
+	arr := builder.NewArray()
+	defer arr.Release()
+	arrStr := arr.String()
+	assert.Equal(t, fmt.Sprintf(`["%[1]s" (null) "%[1]s"]`, testUUID), arrStr)
+	jsonStr, err := json.Marshal(arr)
+	assert.NoError(t, err)
+
+	arr1, _, err := array.FromJSON(mem, extensions.NewUUIDType(), bytes.NewReader(jsonStr))
+	defer arr1.Release()
+	assert.NoError(t, err)
+	assert.True(t, array.Equal(arr1, arr))
+
+	require.NoError(t, json.Unmarshal(jsonStr, builder))
+	arr2 := builder.NewArray()
+	defer arr2.Release()
+	assert.True(t, array.Equal(arr2, arr))
+}
+
+func TestUUIDExtensionRecordBuilder(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "uuid", Type: extensions.NewUUIDType()},
+	}, nil)
+	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+	builder.Field(0).(*extensions.UUIDBuilder).Append(testUUID)
+	builder.Field(0).(*extensions.UUIDBuilder).AppendNull()
+	builder.Field(0).(*extensions.UUIDBuilder).Append(testUUID)
+	record := builder.NewRecord()
+	b, err := record.MarshalJSON()
+	require.NoError(t, err)
+	require.Equal(t, "[{\"uuid\":\""+testUUID.String()+"\"}\n,{\"uuid\":null}\n,{\"uuid\":\""+testUUID.String()+"\"}\n]", string(b))
+	record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b))
+	require.NoError(t, err)
+	require.Equal(t, record, record1)
+}
+
+func TestUUIDStringRoundTrip(t *testing.T) {
+	// 1. create array
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	b := extensions.NewUUIDBuilder(mem)
+	b.Append(uuid.Nil)
+	b.AppendNull()
+	b.Append(uuid.NameSpaceURL)
+	b.AppendNull()
+	b.Append(testUUID)
+
+	arr := b.NewArray()
+	defer arr.Release()
+
+	// 2. create array via AppendValueFromString
+	b1 := extensions.NewUUIDBuilder(mem)
+	defer b1.Release()
+
+	for i := 0; i < arr.Len(); i++ {
+		assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i)))
+	}
+
+	arr1 := b1.NewArray()
+	defer arr1.Release()
+
+	assert.True(t, array.Equal(arr, arr1))
+}
+
+func TestUUIDTypeBasics(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	assert.Equal(t, "arrow.uuid", typ.ExtensionName())
+	assert.True(t, typ.ExtensionEquals(typ))
+
+	assert.True(t, arrow.TypeEqual(typ, typ))
+	assert.False(t, arrow.TypeEqual(&arrow.FixedSizeBinaryType{ByteWidth: 16}, typ))
+	assert.True(t, arrow.TypeEqual(&arrow.FixedSizeBinaryType{ByteWidth: 16}, typ.StorageType()))
+
+	assert.Equal(t, "extension<arrow.uuid>", typ.String())
+}
+
+func TestUUIDTypeCreateFromArray(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 16})
+	defer bldr.Release()
+
+	bldr.Append(testUUID[:])
+	bldr.AppendNull()
+	bldr.Append(testUUID[:])
+
+	storage := bldr.NewArray()
+	defer storage.Release()
+
+	arr := array.NewExtensionArrayWithStorage(typ, storage)
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	require.Equal(t, testUUID, uuidArr.Value(0))
+	require.Equal(t, uuid.Nil, uuidArr.Value(1))
+	require.Equal(t, testUUID, uuidArr.Value(2))
+}
+
+func TestUUIDTypeBatchIPCRoundTrip(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	bldr.Append(testUUID)
+	bldr.AppendNull()
+	bldr.AppendBytes(testUUID)
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+		[]arrow.Array{arr}, -1)
+	defer batch.Release()
+
+	var written arrow.Record
+	{
+		var buf bytes.Buffer
+		wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+		require.NoError(t, wr.Write(batch))
+		require.NoError(t, wr.Close())
+
+		rdr, err := ipc.NewReader(&buf)
+		require.NoError(t, err)
+		written, err = rdr.Read()
+		require.NoError(t, err)
+		written.Retain()
+		defer written.Release()
+		rdr.Release()
+	}
+
+	assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+		batch.Schema(), written.Schema())
+
+	assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+		batch, written)
+}
+
+func TestMarshallUUIDArray(t *testing.T) {
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	bldr.Append(testUUID)
+	bldr.AppendNull()
+	bldr.AppendBytes(testUUID)
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	b, err := uuidArr.MarshalJSON()
+	require.NoError(t, err)
+
+	expectedJSON := fmt.Sprintf(`["%[1]s",null,"%[1]s"]`, testUUID)
+	require.Equal(t, expectedJSON, string(b))
+}
+
+func TestUUIDRecordToJSON(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	uuid1 := uuid.MustParse("8c607ed4-07b2-4b9c-b5eb-c0387357f9ae")
+
+	bldr.Append(uuid1)
+	bldr.AppendNull()
+
+	// c5f2cbd9-7094-491a-b267-167bb62efe02
+	bldr.AppendBytes([16]byte{197, 242, 203, 217, 112, 148, 73, 26, 178, 103, 22, 123, 182, 46, 254, 2})
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	rec := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "uuid", Type: typ, Nullable: true}}, nil), []arrow.Array{uuidArr}, 3)
+	defer rec.Release()
+
+	buf := bytes.NewBuffer([]byte("\n")) // expected output has leading newline for clearer formatting
+	require.NoError(t, array.RecordToJSON(rec, buf))
+
+	expectedJSON := `
+		{"uuid":"8c607ed4-07b2-4b9c-b5eb-c0387357f9ae"}
+		{"uuid":null}
+		{"uuid":"c5f2cbd9-7094-491a-b267-167bb62efe02"}
+	`
+
+	expectedJSONLines := strings.Split(expectedJSON, "\n")
+	actualJSONLines := strings.Split(buf.String(), "\n")
+
+	require.Equal(t, len(expectedJSONLines), len(actualJSONLines))
+	for i := range expectedJSONLines {
+		if strings.TrimSpace(expectedJSONLines[i]) != "" {
+			require.JSONEq(t, expectedJSONLines[i], actualJSONLines[i])
+		}
+	}
+}
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
index 1528bb05d9daa..b9535002a0a17 100644
--- a/go/arrow/internal/flight_integration/scenario.go
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -40,7 +40,6 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/internal/arrjson"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"golang.org/x/xerrors"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
@@ -161,9 +160,6 @@ func (s *defaultIntegrationTester) RunClient(addr string, opts ...grpc.DialOptio
 
 	ctx := context.Background()
 
-	arrow.RegisterExtensionType(types.NewUUIDType())
-	defer arrow.UnregisterExtensionType("uuid")
-
 	descr := &flight.FlightDescriptor{
 		Type: flight.DescriptorPATH,
 		Path: []string{s.path},
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
index b3e1dcac14119..c47a091268be9 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
@@ -22,12 +22,10 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/arrio"
 	"github.com/apache/arrow/go/v18/arrow/internal/arrjson"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
-	"github.com/apache/arrow/go/v18/internal/types"
 )
 
 func main() {
@@ -50,8 +48,6 @@ func main() {
 }
 
 func runCommand(jsonName, arrowName, mode string, verbose bool) error {
-	arrow.RegisterExtensionType(types.NewUUIDType())
-
 	if jsonName == "" {
 		return fmt.Errorf("must specify json file name")
 	}
diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go
index 33bc63c2a0068..14b8da2cf7cf7 100644
--- a/go/arrow/ipc/metadata_test.go
+++ b/go/arrow/ipc/metadata_test.go
@@ -23,10 +23,10 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
 	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	flatbuffers "github.com/google/flatbuffers/go"
 	"github.com/stretchr/testify/assert"
 )
@@ -169,7 +169,7 @@ func TestRWFooter(t *testing.T) {
 }
 
 func exampleUUID(mem memory.Allocator) arrow.Array {
-	extType := types.NewUUIDType()
+	extType := extensions.NewUUIDType()
 	bldr := array.NewExtensionBuilder(mem, extType)
 	defer bldr.Release()
 
@@ -184,9 +184,6 @@ func TestUnrecognizedExtensionType(t *testing.T) {
 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 	defer pool.AssertSize(t, 0)
 
-	// register the uuid type
-	assert.NoError(t, arrow.RegisterExtensionType(types.NewUUIDType()))
-
 	extArr := exampleUUID(pool)
 	defer extArr.Release()
 
@@ -205,7 +202,9 @@ func TestUnrecognizedExtensionType(t *testing.T) {
 
 	// unregister the uuid type before we read back the buffer so it is
 	// unrecognized when reading back the record batch.
-	assert.NoError(t, arrow.UnregisterExtensionType("uuid"))
+	assert.NoError(t, arrow.UnregisterExtensionType("arrow.uuid"))
+	// re-register once the test is complete
+	defer arrow.RegisterExtensionType(extensions.NewUUIDType())
 	rdr, err := NewReader(&buf, WithAllocator(pool))
 	defer rdr.Release()
 
diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go
index 85c64d86bffcb..33ada2d488f71 100644
--- a/go/internal/types/extension_types.go
+++ b/go/internal/types/extension_types.go
@@ -18,238 +18,15 @@
 package types
 
 import (
-	"bytes"
 	"encoding/binary"
 	"fmt"
 	"reflect"
-	"strings"
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
-	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/json"
-	"github.com/google/uuid"
 	"golang.org/x/xerrors"
 )
 
-var UUID = NewUUIDType()
-
-type UUIDBuilder struct {
-	*array.ExtensionBuilder
-}
-
-func NewUUIDBuilder(mem memory.Allocator) *UUIDBuilder {
-	return &UUIDBuilder{ExtensionBuilder: array.NewExtensionBuilder(mem, NewUUIDType())}
-}
-
-func (b *UUIDBuilder) Append(v uuid.UUID) {
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).Append(v[:])
-}
-
-func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID) {
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).UnsafeAppend(v[:])
-}
-
-func (b *UUIDBuilder) AppendValueFromString(s string) error {
-	if s == array.NullValueStr {
-		b.AppendNull()
-		return nil
-	}
-
-	uid, err := uuid.Parse(s)
-	if err != nil {
-		return err
-	}
-
-	b.Append(uid)
-	return nil
-}
-
-func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool) {
-	if len(v) != len(valid) && len(valid) != 0 {
-		panic("len(v) != len(valid) && len(valid) != 0")
-	}
-
-	data := make([][]byte, len(v))
-	for i := range v {
-		if len(valid) > 0 && !valid[i] {
-			continue
-		}
-		data[i] = v[i][:]
-	}
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid)
-}
-
-func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error {
-	t, err := dec.Token()
-	if err != nil {
-		return err
-	}
-
-	var val uuid.UUID
-	switch v := t.(type) {
-	case string:
-		val, err = uuid.Parse(v)
-		if err != nil {
-			return err
-		}
-	case []byte:
-		val, err = uuid.ParseBytes(v)
-		if err != nil {
-			return err
-		}
-	case nil:
-		b.AppendNull()
-		return nil
-	default:
-		return &json.UnmarshalTypeError{
-			Value:  fmt.Sprint(t),
-			Type:   reflect.TypeOf([]byte{}),
-			Offset: dec.InputOffset(),
-			Struct: fmt.Sprintf("FixedSizeBinary[%d]", 16),
-		}
-	}
-
-	b.Append(val)
-	return nil
-}
-
-func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error {
-	for dec.More() {
-		if err := b.UnmarshalOne(dec); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (b *UUIDBuilder) UnmarshalJSON(data []byte) error {
-	dec := json.NewDecoder(bytes.NewReader(data))
-	t, err := dec.Token()
-	if err != nil {
-		return err
-	}
-
-	if delim, ok := t.(json.Delim); !ok || delim != '[' {
-		return fmt.Errorf("uuid builder must unpack from json array, found %s", delim)
-	}
-
-	return b.Unmarshal(dec)
-}
-
-// UUIDArray is a simple array which is a FixedSizeBinary(16)
-type UUIDArray struct {
-	array.ExtensionArrayBase
-}
-
-func (a *UUIDArray) String() string {
-	arr := a.Storage().(*array.FixedSizeBinary)
-	o := new(strings.Builder)
-	o.WriteString("[")
-	for i := 0; i < arr.Len(); i++ {
-		if i > 0 {
-			o.WriteString(" ")
-		}
-		switch {
-		case a.IsNull(i):
-			o.WriteString(array.NullValueStr)
-		default:
-			fmt.Fprintf(o, "%q", a.Value(i))
-		}
-	}
-	o.WriteString("]")
-	return o.String()
-}
-
-func (a *UUIDArray) Value(i int) uuid.UUID {
-	if a.IsNull(i) {
-		return uuid.Nil
-	}
-	return uuid.Must(uuid.FromBytes(a.Storage().(*array.FixedSizeBinary).Value(i)))
-}
-
-func (a *UUIDArray) ValueStr(i int) string {
-	switch {
-	case a.IsNull(i):
-		return array.NullValueStr
-	default:
-		return a.Value(i).String()
-	}
-}
-
-func (a *UUIDArray) MarshalJSON() ([]byte, error) {
-	arr := a.Storage().(*array.FixedSizeBinary)
-	values := make([]interface{}, a.Len())
-	for i := 0; i < a.Len(); i++ {
-		if a.IsValid(i) {
-			values[i] = uuid.Must(uuid.FromBytes(arr.Value(i))).String()
-		}
-	}
-	return json.Marshal(values)
-}
-
-func (a *UUIDArray) GetOneForMarshal(i int) interface{} {
-	if a.IsNull(i) {
-		return nil
-	}
-	return a.Value(i)
-}
-
-// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
-// to be used for representing UUIDs
-type UUIDType struct {
-	arrow.ExtensionBase
-}
-
-// NewUUIDType is a convenience function to create an instance of UUIDType
-// with the correct storage type
-func NewUUIDType() *UUIDType {
-	return &UUIDType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
-}
-
-// ArrayType returns TypeOf(UUIDArray{}) for constructing UUID arrays
-func (*UUIDType) ArrayType() reflect.Type {
-	return reflect.TypeOf(UUIDArray{})
-}
-
-func (*UUIDType) ExtensionName() string {
-	return "uuid"
-}
-
-func (e *UUIDType) String() string {
-	return fmt.Sprintf("extension_type<storage=%s>", e.Storage)
-}
-
-func (e *UUIDType) MarshalJSON() ([]byte, error) {
-	return []byte(fmt.Sprintf(`{"name":"%s","metadata":%s}`, e.ExtensionName(), e.Serialize())), nil
-}
-
-// Serialize returns "uuid-serialized" for testing proper metadata passing
-func (*UUIDType) Serialize() string {
-	return "uuid-serialized"
-}
-
-// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16} and the data to be
-// "uuid-serialized" in order to correctly create a UUIDType for testing deserialize.
-func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
-	if data != "uuid-serialized" {
-		return nil, fmt.Errorf("type identifier did not match: '%s'", data)
-	}
-	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
-		return nil, fmt.Errorf("invalid storage type for UUIDType: %s", storageType.Name())
-	}
-	return NewUUIDType(), nil
-}
-
-// ExtensionEquals returns true if both extensions have the same name
-func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
-	return e.ExtensionName() == other.ExtensionName()
-}
-
-func (*UUIDType) NewBuilder(mem memory.Allocator) array.Builder {
-	return NewUUIDBuilder(mem)
-}
-
 // Parametric1Array is a simple int32 array for use with the Parametric1Type
 // in testing a parameterized user-defined extension type.
 type Parametric1Array struct {
@@ -518,14 +295,14 @@ func (SmallintType) ArrayType() reflect.Type { return reflect.TypeOf(SmallintArr
 
 func (SmallintType) ExtensionName() string { return "smallint" }
 
-func (SmallintType) Serialize() string { return "smallint" }
+func (SmallintType) Serialize() string { return "smallint-serialized" }
 
 func (s *SmallintType) ExtensionEquals(other arrow.ExtensionType) bool {
 	return s.Name() == other.Name()
 }
 
 func (SmallintType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
-	if data != "smallint" {
+	if data != "smallint-serialized" {
 		return nil, fmt.Errorf("type identifier did not match: '%s'", data)
 	}
 	if !arrow.TypeEqual(storageType, arrow.PrimitiveTypes.Int16) {
diff --git a/go/internal/types/extension_types_test.go b/go/internal/types/extension_types_test.go
deleted file mode 100644
index 65f6353d01be1..0000000000000
--- a/go/internal/types/extension_types_test.go
+++ /dev/null
@@ -1,95 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package types_test
-
-import (
-	"bytes"
-	"testing"
-
-	"github.com/apache/arrow/go/v18/arrow"
-	"github.com/apache/arrow/go/v18/arrow/array"
-	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/json"
-	"github.com/apache/arrow/go/v18/internal/types"
-	"github.com/google/uuid"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-var testUUID = uuid.New()
-
-func TestUUIDExtensionBuilder(t *testing.T) {
-	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
-	defer mem.AssertSize(t, 0)
-	builder := types.NewUUIDBuilder(mem)
-	builder.Append(testUUID)
-	arr := builder.NewArray()
-	defer arr.Release()
-	arrStr := arr.String()
-	assert.Equal(t, "[\""+testUUID.String()+"\"]", arrStr)
-	jsonStr, err := json.Marshal(arr)
-	assert.NoError(t, err)
-
-	arr1, _, err := array.FromJSON(mem, types.NewUUIDType(), bytes.NewReader(jsonStr))
-	defer arr1.Release()
-	assert.NoError(t, err)
-	assert.Equal(t, arr, arr1)
-}
-
-func TestUUIDExtensionRecordBuilder(t *testing.T) {
-	schema := arrow.NewSchema([]arrow.Field{
-		{Name: "uuid", Type: types.NewUUIDType()},
-	}, nil)
-	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
-	builder.Field(0).(*types.UUIDBuilder).Append(testUUID)
-	record := builder.NewRecord()
-	b, err := record.MarshalJSON()
-	require.NoError(t, err)
-	require.Equal(t, "[{\"uuid\":\""+testUUID.String()+"\"}\n]", string(b))
-	record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b))
-	require.NoError(t, err)
-	require.Equal(t, record, record1)
-}
-
-func TestUUIDStringRoundTrip(t *testing.T) {
-	// 1. create array
-	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
-	defer mem.AssertSize(t, 0)
-
-	b := types.NewUUIDBuilder(mem)
-	b.Append(uuid.Nil)
-	b.AppendNull()
-	b.Append(uuid.NameSpaceURL)
-	b.AppendNull()
-	b.Append(testUUID)
-
-	arr := b.NewArray()
-	defer arr.Release()
-
-	// 2. create array via AppendValueFromString
-	b1 := types.NewUUIDBuilder(mem)
-	defer b1.Release()
-
-	for i := 0; i < arr.Len(); i++ {
-		assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i)))
-	}
-
-	arr1 := b1.NewArray()
-	defer arr1.Release()
-
-	assert.True(t, array.Equal(arr, arr1))
-}
diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go
index 6e04f4254f9fa..4e480aeb8660b 100644
--- a/go/parquet/cmd/parquet_reader/main.go
+++ b/go/parquet/cmd/parquet_reader/main.go
@@ -154,7 +154,7 @@ func main() {
 			if descr.ConvertedType() != schema.ConvertedTypes.None {
 				fmt.Printf("/%s", descr.ConvertedType())
 				if descr.ConvertedType() == schema.ConvertedTypes.Decimal {
-					dec := descr.LogicalType().(*schema.DecimalLogicalType)
+					dec := descr.LogicalType().(schema.DecimalLogicalType)
 					fmt.Printf("(%d,%d)", dec.Precision(), dec.Scale())
 				}
 			}
diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go
index 887ed79343a42..345e9d440a1ca 100644
--- a/go/parquet/metadata/app_version.go
+++ b/go/parquet/metadata/app_version.go
@@ -164,7 +164,7 @@ func (v AppVersion) HasCorrectStatistics(coltype parquet.Type, logicalType schem
 	// parquet-cpp-arrow version 4.0.0 fixed Decimal comparisons for creating min/max stats
 	// parquet-cpp also becomes parquet-cpp-arrow as of version 4.0.0
 	if v.App == "parquet-cpp" || (v.App == "parquet-cpp-arrow" && v.LessThan(parquet1655FixedVersion)) {
-		if _, ok := logicalType.(*schema.DecimalLogicalType); ok && coltype == parquet.Types.FixedLenByteArray {
+		if _, ok := logicalType.(schema.DecimalLogicalType); ok && coltype == parquet.Types.FixedLenByteArray {
 			return false
 		}
 	}
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 16282173a685c..a238a78133e55 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -30,6 +30,7 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/bitutil"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/internal/types"
@@ -715,16 +716,6 @@ type ParquetIOTestSuite struct {
 	suite.Suite
 }
 
-func (ps *ParquetIOTestSuite) SetupTest() {
-	ps.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (ps *ParquetIOTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		ps.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (ps *ParquetIOTestSuite) makeSimpleSchema(typ arrow.DataType, rep parquet.Repetition) *schema.GroupNode {
 	byteWidth := int32(-1)
 
@@ -2053,7 +2044,7 @@ func (ps *ParquetIOTestSuite) TestArrowExtensionTypeRoundTrip() {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(ps.T(), 0)
 
-	builder := types.NewUUIDBuilder(mem)
+	builder := extensions.NewUUIDBuilder(mem)
 	builder.Append(uuid.New())
 	arr := builder.NewArray()
 	defer arr.Release()
@@ -2076,22 +2067,23 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 
 	{
 		// Prepare `written` table with the extension type registered.
-		extType := types.NewUUIDType()
+		extType := types.NewSmallintType()
 		bldr := array.NewExtensionBuilder(mem, extType)
 		defer bldr.Release()
 
-		bldr.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(
-			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+		bldr.Builder.(*array.Int16Builder).AppendValues(
+			[]int16{0, 0, 1, 2},
 			[]bool{false, true, true, true})
 
 		arr := bldr.NewArray()
 		defer arr.Release()
 
-		if arrow.GetExtensionType("uuid") != nil {
-			ps.NoError(arrow.UnregisterExtensionType("uuid"))
+		if arrow.GetExtensionType("smallint") != nil {
+			ps.NoError(arrow.UnregisterExtensionType("smallint"))
+			defer arrow.RegisterExtensionType(extType)
 		}
 
-		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
+		fld := arrow.Field{Name: "smallint", Type: arr.DataType(), Nullable: true}
 		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
 		defer arr.Release() // NewChunked
 		written = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
@@ -2101,16 +2093,16 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 
 	{
 		// Prepare `expected` table with the extension type unregistered in the underlying type.
-		bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 16})
+		bldr := array.NewInt16Builder(mem)
 		defer bldr.Release()
 		bldr.AppendValues(
-			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+			[]int16{0, 0, 1, 2},
 			[]bool{false, true, true, true})
 
 		arr := bldr.NewArray()
 		defer arr.Release()
 
-		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
+		fld := arrow.Field{Name: "smallint", Type: arr.DataType(), Nullable: true}
 		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
 		defer arr.Release() // NewChunked
 		expected = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
@@ -2147,13 +2139,55 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 	ps.Truef(array.Equal(exc, tbc), "expected: %T %s\ngot: %T %s", exc, exc, tbc, tbc)
 
 	expectedMd := arrow.MetadataFrom(map[string]string{
-		ipc.ExtensionTypeKeyName:     "uuid",
-		ipc.ExtensionMetadataKeyName: "uuid-serialized",
+		ipc.ExtensionTypeKeyName:     "smallint",
+		ipc.ExtensionMetadataKeyName: "smallint-serialized",
 		"PARQUET:field_id":           "-1",
 	})
 	ps.Truef(expectedMd.Equal(tbl.Column(0).Field().Metadata), "expected: %v\ngot: %v", expectedMd, tbl.Column(0).Field().Metadata)
 }
 
+func (ps *ParquetIOTestSuite) TestArrowExtensionTypeLogicalType() {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(ps.T(), 0)
+
+	jsonType, err := extensions.NewJSONType(arrow.BinaryTypes.String)
+	ps.NoError(err)
+
+	sch := arrow.NewSchema([]arrow.Field{
+		{Name: "uuid", Type: extensions.NewUUIDType()},
+		{Name: "json", Type: jsonType},
+	},
+		nil,
+	)
+	bldr := array.NewRecordBuilder(mem, sch)
+	defer bldr.Release()
+
+	bldr.Field(0).(*extensions.UUIDBuilder).Append(uuid.New())
+	bldr.Field(1).(*array.ExtensionBuilder).AppendValueFromString(`{"hello": ["world", 2, true], "world": null}`)
+	rec := bldr.NewRecord()
+	defer rec.Release()
+
+	var buf bytes.Buffer
+	wr, err := pqarrow.NewFileWriter(
+		sch,
+		&buf,
+		parquet.NewWriterProperties(),
+		pqarrow.DefaultWriterProps(),
+	)
+	ps.Require().NoError(err)
+
+	ps.Require().NoError(wr.Write(rec))
+	ps.Require().NoError(wr.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
+	ps.Require().NoError(err)
+	defer rdr.Close()
+
+	pqSchema := rdr.MetaData().Schema
+	ps.True(pqSchema.Column(0).LogicalType().Equals(schema.UUIDLogicalType{}))
+	ps.True(pqSchema.Column(1).LogicalType().Equals(schema.JSONLogicalType{}))
+}
+
 func TestWriteTableMemoryAllocation(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	sc := arrow.NewSchema([]arrow.Field{
@@ -2163,7 +2197,7 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
 			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true})},
 		{Name: "arr_i64", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64)},
-		{Name: "uuid", Type: types.NewUUIDType(), Nullable: true},
+		{Name: "uuid", Type: extensions.NewUUIDType(), Nullable: true},
 	}, nil)
 
 	bld := array.NewRecordBuilder(mem, sc)
@@ -2176,7 +2210,7 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 	abld := bld.Field(3).(*array.ListBuilder)
 	abld.Append(true)
 	abld.ValueBuilder().(*array.Int64Builder).Append(2)
-	bld.Field(4).(*types.UUIDBuilder).Append(uuid.MustParse("00000000-0000-0000-0000-000000000001"))
+	bld.Field(4).(*extensions.UUIDBuilder).Append(uuid.MustParse("00000000-0000-0000-0000-000000000001"))
 
 	rec := bld.NewRecord()
 	bld.Release()
diff --git a/go/parquet/pqarrow/path_builder_test.go b/go/parquet/pqarrow/path_builder_test.go
index 9bbae426b8a46..364f836d0bbca 100644
--- a/go/parquet/pqarrow/path_builder_test.go
+++ b/go/parquet/pqarrow/path_builder_test.go
@@ -22,8 +22,8 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -364,12 +364,12 @@ func TestNestedExtensionListsWithSomeNulls(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(t, 0)
 
-	listType := arrow.ListOf(types.NewUUIDType())
+	listType := arrow.ListOf(extensions.NewUUIDType())
 	bldr := array.NewListBuilder(mem, listType)
 	defer bldr.Release()
 
 	nestedBldr := bldr.ValueBuilder().(*array.ListBuilder)
-	vb := nestedBldr.ValueBuilder().(*types.UUIDBuilder)
+	vb := nestedBldr.ValueBuilder().(*extensions.UUIDBuilder)
 
 	uuid1 := uuid.New()
 	uuid3 := uuid.New()
diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go
index ce5cc6f905084..4882077671f0f 100644
--- a/go/parquet/pqarrow/schema.go
+++ b/go/parquet/pqarrow/schema.go
@@ -25,7 +25,6 @@ import (
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/flight"
-	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/parquet"
 	"github.com/apache/arrow/go/v18/parquet/file"
@@ -120,6 +119,15 @@ func (sm *SchemaManifest) GetFieldIndices(indices []int) ([]int, error) {
 	return ret, nil
 }
 
+// ExtensionCustomParquetType is an interface that Arrow ExtensionTypes may implement
+// to specify the target LogicalType to use when converting to Parquet.
+//
+// The PrimitiveType is not configurable, and is determined by a fixed mapping from
+// the extension's StorageType to a Parquet type (see getParquetType in pqarrow source).
+type ExtensionCustomParquetType interface {
+	ParquetLogicalType() schema.LogicalType
+}
+
 func isDictionaryReadSupported(dt arrow.DataType) bool {
 	return arrow.IsBinaryLike(dt.ID())
 }
@@ -250,104 +258,14 @@ func structToNode(typ *arrow.StructType, name string, nullable bool, props *parq
 }
 
 func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties, arrprops ArrowWriterProperties) (schema.Node, error) {
-	var (
-		logicalType schema.LogicalType = schema.NoLogicalType{}
-		typ         parquet.Type
-		repType     = repFromNullable(field.Nullable)
-		length      = -1
-		precision   = -1
-		scale       = -1
-		err         error
-	)
+	repType := repFromNullable(field.Nullable)
 
+	// Handle complex types i.e. GroupNodes
 	switch field.Type.ID() {
 	case arrow.NULL:
-		typ = parquet.Types.Int32
-		logicalType = &schema.NullLogicalType{}
 		if repType != parquet.Repetitions.Optional {
 			return nil, xerrors.New("nulltype arrow field must be nullable")
 		}
-	case arrow.BOOL:
-		typ = parquet.Types.Boolean
-	case arrow.UINT8:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(8, false)
-	case arrow.INT8:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(8, true)
-	case arrow.UINT16:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(16, false)
-	case arrow.INT16:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(16, true)
-	case arrow.UINT32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(32, false)
-	case arrow.INT32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(32, true)
-	case arrow.UINT64:
-		typ = parquet.Types.Int64
-		logicalType = schema.NewIntLogicalType(64, false)
-	case arrow.INT64:
-		typ = parquet.Types.Int64
-		logicalType = schema.NewIntLogicalType(64, true)
-	case arrow.FLOAT32:
-		typ = parquet.Types.Float
-	case arrow.FLOAT64:
-		typ = parquet.Types.Double
-	case arrow.STRING, arrow.LARGE_STRING:
-		logicalType = schema.StringLogicalType{}
-		fallthrough
-	case arrow.BINARY, arrow.LARGE_BINARY:
-		typ = parquet.Types.ByteArray
-	case arrow.FIXED_SIZE_BINARY:
-		typ = parquet.Types.FixedLenByteArray
-		length = field.Type.(*arrow.FixedSizeBinaryType).ByteWidth
-	case arrow.DECIMAL, arrow.DECIMAL256:
-		dectype := field.Type.(arrow.DecimalType)
-		precision = int(dectype.GetPrecision())
-		scale = int(dectype.GetScale())
-
-		if props.StoreDecimalAsInteger() && 1 <= precision && precision <= 18 {
-			if precision <= 9 {
-				typ = parquet.Types.Int32
-			} else {
-				typ = parquet.Types.Int64
-			}
-		} else {
-			typ = parquet.Types.FixedLenByteArray
-			length = int(DecimalSize(int32(precision)))
-		}
-
-		logicalType = schema.NewDecimalLogicalType(int32(precision), int32(scale))
-	case arrow.DATE32:
-		typ = parquet.Types.Int32
-		logicalType = schema.DateLogicalType{}
-	case arrow.DATE64:
-		typ = parquet.Types.Int32
-		logicalType = schema.DateLogicalType{}
-	case arrow.TIMESTAMP:
-		typ, logicalType, err = getTimestampMeta(field.Type.(*arrow.TimestampType), props, arrprops)
-		if err != nil {
-			return nil, err
-		}
-	case arrow.TIME32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitMillis)
-	case arrow.TIME64:
-		typ = parquet.Types.Int64
-		timeType := field.Type.(*arrow.Time64Type)
-		if timeType.Unit == arrow.Nanosecond {
-			logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitNanos)
-		} else {
-			logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitMicros)
-		}
-	case arrow.FLOAT16:
-		typ = parquet.Types.FixedLenByteArray
-		length = arrow.Float16SizeBytes
-		logicalType = schema.Float16LogicalType{}
 	case arrow.STRUCT:
 		return structToNode(field.Type.(*arrow.StructType), field.Name, field.Nullable, props, arrprops)
 	case arrow.FIXED_SIZE_LIST, arrow.LIST:
@@ -369,16 +287,6 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
 		dictType := field.Type.(*arrow.DictionaryType)
 		return fieldToNode(name, arrow.Field{Name: name, Type: dictType.ValueType, Nullable: field.Nullable, Metadata: field.Metadata},
 			props, arrprops)
-	case arrow.EXTENSION:
-		return fieldToNode(name, arrow.Field{
-			Name:     name,
-			Type:     field.Type.(arrow.ExtensionType).StorageType(),
-			Nullable: field.Nullable,
-			Metadata: arrow.MetadataFrom(map[string]string{
-				ipc.ExtensionTypeKeyName:     field.Type.(arrow.ExtensionType).ExtensionName(),
-				ipc.ExtensionMetadataKeyName: field.Type.(arrow.ExtensionType).Serialize(),
-			}),
-		}, props, arrprops)
 	case arrow.MAP:
 		mapType := field.Type.(*arrow.MapType)
 		keyNode, err := fieldToNode("key", mapType.KeyField(), props, arrprops)
@@ -402,8 +310,12 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
 			}, -1)
 		}
 		return schema.MapOf(field.Name, keyNode, valueNode, repFromNullable(field.Nullable), -1)
-	default:
-		return nil, fmt.Errorf("%w: support for %s", arrow.ErrNotImplemented, field.Type.ID())
+	}
+
+	// Not a GroupNode
+	typ, logicalType, length, err := getParquetType(field.Type, props, arrprops)
+	if err != nil {
+		return nil, err
 	}
 
 	return schema.NewPrimitiveNodeLogical(name, repType, logicalType, typ, length, fieldIDFromMeta(field.Metadata))
@@ -472,7 +384,7 @@ func (s schemaTree) RecordLeaf(leaf *SchemaField) {
 	s.manifest.ColIndexToField[leaf.ColIndex] = leaf
 }
 
-func arrowInt(log *schema.IntLogicalType) (arrow.DataType, error) {
+func arrowInt(log schema.IntLogicalType) (arrow.DataType, error) {
 	switch log.BitWidth() {
 	case 8:
 		if log.IsSigned() {
@@ -499,7 +411,7 @@ func arrowInt(log *schema.IntLogicalType) (arrow.DataType, error) {
 	}
 }
 
-func arrowTime32(logical *schema.TimeLogicalType) (arrow.DataType, error) {
+func arrowTime32(logical schema.TimeLogicalType) (arrow.DataType, error) {
 	if logical.TimeUnit() == schema.TimeUnitMillis {
 		return arrow.FixedWidthTypes.Time32ms, nil
 	}
@@ -507,7 +419,7 @@ func arrowTime32(logical *schema.TimeLogicalType) (arrow.DataType, error) {
 	return nil, xerrors.New(logical.String() + " cannot annotate a time32")
 }
 
-func arrowTime64(logical *schema.TimeLogicalType) (arrow.DataType, error) {
+func arrowTime64(logical schema.TimeLogicalType) (arrow.DataType, error) {
 	switch logical.TimeUnit() {
 	case schema.TimeUnitMicros:
 		return arrow.FixedWidthTypes.Time64us, nil
@@ -518,7 +430,7 @@ func arrowTime64(logical *schema.TimeLogicalType) (arrow.DataType, error) {
 	}
 }
 
-func arrowTimestamp(logical *schema.TimestampLogicalType) (arrow.DataType, error) {
+func arrowTimestamp(logical schema.TimestampLogicalType) (arrow.DataType, error) {
 	tz := ""
 
 	// ConvertedTypes are adjusted to UTC per backward compatibility guidelines
@@ -539,7 +451,7 @@ func arrowTimestamp(logical *schema.TimestampLogicalType) (arrow.DataType, error
 	}
 }
 
-func arrowDecimal(logical *schema.DecimalLogicalType) arrow.DataType {
+func arrowDecimal(logical schema.DecimalLogicalType) arrow.DataType {
 	if logical.Precision() <= decimal128.MaxPrecision {
 		return &arrow.Decimal128Type{Precision: logical.Precision(), Scale: logical.Scale()}
 	}
@@ -550,11 +462,11 @@ func arrowFromInt32(logical schema.LogicalType) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
 	case schema.NoLogicalType:
 		return arrow.PrimitiveTypes.Int32, nil
-	case *schema.TimeLogicalType:
+	case schema.TimeLogicalType:
 		return arrowTime32(logtype)
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
-	case *schema.IntLogicalType:
+	case schema.IntLogicalType:
 		return arrowInt(logtype)
 	case schema.DateLogicalType:
 		return arrow.FixedWidthTypes.Date32, nil
@@ -569,13 +481,13 @@ func arrowFromInt64(logical schema.LogicalType) (arrow.DataType, error) {
 	}
 
 	switch logtype := logical.(type) {
-	case *schema.IntLogicalType:
+	case schema.IntLogicalType:
 		return arrowInt(logtype)
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
-	case *schema.TimeLogicalType:
+	case schema.TimeLogicalType:
 		return arrowTime64(logtype)
-	case *schema.TimestampLogicalType:
+	case schema.TimestampLogicalType:
 		return arrowTimestamp(logtype)
 	default:
 		return nil, xerrors.New(logical.String() + " cannot annotate int64")
@@ -586,7 +498,7 @@ func arrowFromByteArray(logical schema.LogicalType) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
 	case schema.StringLogicalType:
 		return arrow.BinaryTypes.String, nil
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
 	case schema.NoLogicalType,
 		schema.EnumLogicalType,
@@ -600,7 +512,7 @@ func arrowFromByteArray(logical schema.LogicalType) (arrow.DataType, error) {
 
 func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
 	case schema.NoLogicalType, schema.IntervalLogicalType, schema.UUIDLogicalType:
 		return &arrow.FixedSizeBinaryType{ByteWidth: int(length)}, nil
@@ -611,6 +523,84 @@ func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, erro
 	}
 }
 
+func getParquetType(typ arrow.DataType, props *parquet.WriterProperties, arrprops ArrowWriterProperties) (parquet.Type, schema.LogicalType, int, error) {
+	switch typ.ID() {
+	case arrow.NULL:
+		return parquet.Types.Int32, schema.NullLogicalType{}, -1, nil
+	case arrow.BOOL:
+		return parquet.Types.Boolean, schema.NoLogicalType{}, -1, nil
+	case arrow.UINT8:
+		return parquet.Types.Int32, schema.NewIntLogicalType(8, false), -1, nil
+	case arrow.INT8:
+		return parquet.Types.Int32, schema.NewIntLogicalType(8, true), -1, nil
+	case arrow.UINT16:
+		return parquet.Types.Int32, schema.NewIntLogicalType(16, false), -1, nil
+	case arrow.INT16:
+		return parquet.Types.Int32, schema.NewIntLogicalType(16, true), -1, nil
+	case arrow.UINT32:
+		return parquet.Types.Int32, schema.NewIntLogicalType(32, false), -1, nil
+	case arrow.INT32:
+		return parquet.Types.Int32, schema.NewIntLogicalType(32, true), -1, nil
+	case arrow.UINT64:
+		return parquet.Types.Int64, schema.NewIntLogicalType(64, false), -1, nil
+	case arrow.INT64:
+		return parquet.Types.Int64, schema.NewIntLogicalType(64, true), -1, nil
+	case arrow.FLOAT32:
+		return parquet.Types.Float, schema.NoLogicalType{}, -1, nil
+	case arrow.FLOAT64:
+		return parquet.Types.Double, schema.NoLogicalType{}, -1, nil
+	case arrow.STRING, arrow.LARGE_STRING:
+		return parquet.Types.ByteArray, schema.StringLogicalType{}, -1, nil
+	case arrow.BINARY, arrow.LARGE_BINARY:
+		return parquet.Types.ByteArray, schema.NoLogicalType{}, -1, nil
+	case arrow.FIXED_SIZE_BINARY:
+		return parquet.Types.FixedLenByteArray, schema.NoLogicalType{}, typ.(*arrow.FixedSizeBinaryType).ByteWidth, nil
+	case arrow.DECIMAL, arrow.DECIMAL256:
+		dectype := typ.(arrow.DecimalType)
+		precision := int(dectype.GetPrecision())
+		scale := int(dectype.GetScale())
+
+		if !props.StoreDecimalAsInteger() || precision > 18 {
+			return parquet.Types.FixedLenByteArray, schema.NewDecimalLogicalType(int32(precision), int32(scale)), int(DecimalSize(int32(precision))), nil
+		}
+
+		pqType := parquet.Types.Int32
+		if precision > 9 {
+			pqType = parquet.Types.Int64
+		}
+
+		return pqType, schema.NoLogicalType{}, -1, nil
+	case arrow.DATE32:
+		return parquet.Types.Int32, schema.DateLogicalType{}, -1, nil
+	case arrow.DATE64:
+		return parquet.Types.Int32, schema.DateLogicalType{}, -1, nil
+	case arrow.TIMESTAMP:
+		pqType, logicalType, err := getTimestampMeta(typ.(*arrow.TimestampType), props, arrprops)
+		return pqType, logicalType, -1, err
+	case arrow.TIME32:
+		return parquet.Types.Int32, schema.NewTimeLogicalType(true, schema.TimeUnitMillis), -1, nil
+	case arrow.TIME64:
+		pqTimeUnit := schema.TimeUnitMicros
+		if typ.(*arrow.Time64Type).Unit == arrow.Nanosecond {
+			pqTimeUnit = schema.TimeUnitNanos
+		}
+
+		return parquet.Types.Int64, schema.NewTimeLogicalType(true, pqTimeUnit), -1, nil
+	case arrow.FLOAT16:
+		return parquet.Types.FixedLenByteArray, schema.Float16LogicalType{}, arrow.Float16SizeBytes, nil
+	case arrow.EXTENSION:
+		storageType := typ.(arrow.ExtensionType).StorageType()
+		pqType, logicalType, length, err := getParquetType(storageType, props, arrprops)
+		if withCustomType, ok := typ.(ExtensionCustomParquetType); ok {
+			logicalType = withCustomType.ParquetLogicalType()
+		}
+
+		return pqType, logicalType, length, err
+	default:
+		return parquet.Type(0), nil, 0, fmt.Errorf("%w: support for %s", arrow.ErrNotImplemented, typ.ID())
+	}
+}
+
 func getArrowType(physical parquet.Type, logical schema.LogicalType, typeLen int) (arrow.DataType, error) {
 	if !logical.IsValid() || logical.Equals(schema.NullLogicalType{}) {
 		return arrow.Null, nil
diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go
index 24b031c174bf2..528200fd0e7d9 100644
--- a/go/parquet/pqarrow/schema_test.go
+++ b/go/parquet/pqarrow/schema_test.go
@@ -21,10 +21,10 @@ import (
 	"testing"
 
 	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/flight"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/apache/arrow/go/v18/parquet"
 	"github.com/apache/arrow/go/v18/parquet/metadata"
 	"github.com/apache/arrow/go/v18/parquet/pqarrow"
@@ -34,7 +34,7 @@ import (
 )
 
 func TestGetOriginSchemaBase64(t *testing.T) {
-	uuidType := types.NewUUIDType()
+	uuidType := extensions.NewUUIDType()
 	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
 	extMd := arrow.NewMetadata([]string{ipc.ExtensionMetadataKeyName, ipc.ExtensionTypeKeyName, "PARQUET:field_id"}, []string{uuidType.Serialize(), uuidType.ExtensionName(), "-1"})
 	origArrSc := arrow.NewSchema([]arrow.Field{
@@ -44,10 +44,6 @@ func TestGetOriginSchemaBase64(t *testing.T) {
 	}, nil)
 
 	arrSerializedSc := flight.SerializeSchema(origArrSc, memory.DefaultAllocator)
-	if err := arrow.RegisterExtensionType(uuidType); err != nil {
-		t.Fatal(err)
-	}
-	defer arrow.UnregisterExtensionType(uuidType.ExtensionName())
 	pqschema, err := pqarrow.ToParquet(origArrSc, nil, pqarrow.DefaultWriterProps())
 	require.NoError(t, err)
 
@@ -71,11 +67,7 @@ func TestGetOriginSchemaBase64(t *testing.T) {
 }
 
 func TestGetOriginSchemaUnregisteredExtension(t *testing.T) {
-	uuidType := types.NewUUIDType()
-	if err := arrow.RegisterExtensionType(uuidType); err != nil {
-		t.Fatal(err)
-	}
-
+	uuidType := extensions.NewUUIDType()
 	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
 	origArrSc := arrow.NewSchema([]arrow.Field{
 		{Name: "f1", Type: arrow.BinaryTypes.String, Metadata: md},
@@ -90,6 +82,7 @@ func TestGetOriginSchemaUnregisteredExtension(t *testing.T) {
 	kv.Append("ARROW:schema", base64.StdEncoding.EncodeToString(arrSerializedSc))
 
 	arrow.UnregisterExtensionType(uuidType.ExtensionName())
+	defer arrow.RegisterExtensionType(uuidType)
 	arrsc, err := pqarrow.FromParquet(pqschema, nil, kv)
 	require.NoError(t, err)
 
diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go
index 5fc10f61cebc1..b2b6f50cbf682 100644
--- a/go/parquet/schema/converted_types.go
+++ b/go/parquet/schema/converted_types.go
@@ -113,13 +113,9 @@ func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) LogicalTy
 	case ConvertedTypes.TimeMicros:
 		return NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
 	case ConvertedTypes.TimestampMillis:
-		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMillis)
-		t.(*TimestampLogicalType).fromConverted = true
-		return t
+		return NewTimestampLogicalTypeWithOpts(WithTSIsAdjustedToUTC(), WithTSTimeUnitType(TimeUnitMillis), WithTSFromConverted())
 	case ConvertedTypes.TimestampMicros:
-		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
-		t.(*TimestampLogicalType).fromConverted = true
-		return t
+		return NewTimestampLogicalTypeWithOpts(WithTSIsAdjustedToUTC(), WithTSTimeUnitType(TimeUnitMicros), WithTSFromConverted())
 	case ConvertedTypes.Interval:
 		return IntervalLogicalType{}
 	case ConvertedTypes.Int8:
diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go
index e8adce1ca140e..fa46ea0172f76 100644
--- a/go/parquet/schema/logical_types.go
+++ b/go/parquet/schema/logical_types.go
@@ -45,21 +45,21 @@ func getLogicalType(l *format.LogicalType) LogicalType {
 	case l.IsSetENUM():
 		return EnumLogicalType{}
 	case l.IsSetDECIMAL():
-		return &DecimalLogicalType{typ: l.DECIMAL}
+		return DecimalLogicalType{typ: l.DECIMAL}
 	case l.IsSetDATE():
 		return DateLogicalType{}
 	case l.IsSetTIME():
 		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
 			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
 		}
-		return &TimeLogicalType{typ: l.TIME}
+		return TimeLogicalType{typ: l.TIME}
 	case l.IsSetTIMESTAMP():
 		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
 			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
 		}
-		return &TimestampLogicalType{typ: l.TIMESTAMP}
+		return TimestampLogicalType{typ: l.TIMESTAMP}
 	case l.IsSetINTEGER():
-		return &IntLogicalType{typ: l.INTEGER}
+		return IntLogicalType{typ: l.INTEGER}
 	case l.IsSetUNKNOWN():
 		return NullLogicalType{}
 	case l.IsSetJSON():
@@ -344,7 +344,7 @@ func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
 	if scale < 0 || scale > precision {
 		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
 	}
-	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
+	return DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
 }
 
 // DecimalLogicalType is used to represent a decimal value of a given
@@ -405,7 +405,7 @@ func (t DecimalLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*DecimalLogicalType)
+	other, ok := rhs.(DecimalLogicalType)
 	if !ok {
 		return false
 	}
@@ -509,7 +509,7 @@ func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
 
 // NewTimeLogicalType returns a time type of the given unit.
 func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimeLogicalType{typ: &format.TimeType{
+	return TimeLogicalType{typ: &format.TimeType{
 		IsAdjustedToUTC: isAdjustedToUTC,
 		Unit:            createTimeUnit(unit),
 	}}
@@ -584,7 +584,7 @@ func (t TimeLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t TimeLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*TimeLogicalType)
+	other, ok := rhs.(TimeLogicalType)
 	if !ok {
 		return false
 	}
@@ -595,7 +595,7 @@ func (t TimeLogicalType) Equals(rhs LogicalType) bool {
 // NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
 // set to false
 func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimestampLogicalType{
+	return TimestampLogicalType{
 		typ: &format.TimestampType{
 			IsAdjustedToUTC: isAdjustedToUTC,
 			Unit:            createTimeUnit(unit),
@@ -608,7 +608,7 @@ func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalTyp
 // NewTimestampLogicalTypeForce returns a timestamp logical type with
 // "forceConverted" set to true
 func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimestampLogicalType{
+	return TimestampLogicalType{
 		typ: &format.TimestampType{
 			IsAdjustedToUTC: isAdjustedToUTC,
 			Unit:            createTimeUnit(unit),
@@ -654,14 +654,14 @@ func WithTSFromConverted() TimestampOpt {
 //
 // TimestampType Unit defaults to milliseconds (TimeUnitMillis)
 func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType {
-	ts := &TimestampLogicalType{
+	ts := TimestampLogicalType{
 		typ: &format.TimestampType{
 			Unit: createTimeUnit(TimeUnitMillis), // default to milliseconds
 		},
 	}
 
 	for _, o := range opts {
-		o(ts)
+		o(&ts)
 	}
 
 	return ts
@@ -760,7 +760,7 @@ func (t TimestampLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*TimestampLogicalType)
+	other, ok := rhs.(TimestampLogicalType)
 	if !ok {
 		return false
 	}
@@ -778,7 +778,7 @@ func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
 	default:
 		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
 	}
-	return &IntLogicalType{
+	return IntLogicalType{
 		typ: &format.IntType{
 			BitWidth: bitWidth,
 			IsSigned: signed,
@@ -864,7 +864,7 @@ func (t IntLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t IntLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*IntLogicalType)
+	other, ok := rhs.(IntLogicalType)
 	if !ok {
 		return false
 	}
diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go
index e33925966e178..395d1504182fe 100644
--- a/go/parquet/schema/logical_types_test.go
+++ b/go/parquet/schema/logical_types_test.go
@@ -38,18 +38,18 @@ func TestConvertedLogicalEquivalences(t *testing.T) {
 		{"list", schema.ConvertedTypes.List, schema.NewListLogicalType(), schema.NewListLogicalType()},
 		{"enum", schema.ConvertedTypes.Enum, schema.EnumLogicalType{}, schema.EnumLogicalType{}},
 		{"date", schema.ConvertedTypes.Date, schema.DateLogicalType{}, schema.DateLogicalType{}},
-		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
-		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
-		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimestampLogicalType{}},
-		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimestampLogicalType{}},
-		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimeLogicalType{}},
+		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimeLogicalType{}},
+		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimestampLogicalType{}},
+		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimestampLogicalType{}},
+		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
 		{"json", schema.ConvertedTypes.JSON, schema.JSONLogicalType{}, schema.JSONLogicalType{}},
 		{"bson", schema.ConvertedTypes.BSON, schema.BSONLogicalType{}, schema.BSONLogicalType{}},
 		{"interval", schema.ConvertedTypes.Interval, schema.IntervalLogicalType{}, schema.IntervalLogicalType{}},
@@ -72,8 +72,8 @@ func TestConvertedLogicalEquivalences(t *testing.T) {
 		fromMake := schema.NewDecimalLogicalType(10, 4)
 		assert.IsType(t, fromMake, fromConverted)
 		assert.True(t, fromConverted.Equals(fromMake))
-		assert.IsType(t, &schema.DecimalLogicalType{}, fromConverted)
-		assert.IsType(t, &schema.DecimalLogicalType{}, fromMake)
+		assert.IsType(t, schema.DecimalLogicalType{}, fromConverted)
+		assert.IsType(t, schema.DecimalLogicalType{}, fromMake)
 		assert.True(t, schema.NewDecimalLogicalType(16, 0).Equals(schema.NewDecimalLogicalType(16, 0)))
 	})
 }
@@ -160,12 +160,12 @@ func TestNewTypeIncompatibility(t *testing.T) {
 		{"uuid", schema.UUIDLogicalType{}, schema.UUIDLogicalType{}},
 		{"float16", schema.Float16LogicalType{}, schema.Float16LogicalType{}},
 		{"null", schema.NullLogicalType{}, schema.NullLogicalType{}},
-		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
-		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
-		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
-		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
-		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
-		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
+		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimeLogicalType{}},
+		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimeLogicalType{}},
+		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimeLogicalType{}},
+		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimeLogicalType{}},
+		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimestampLogicalType{}},
+		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimestampLogicalType{}},
 	}
 
 	for _, tt := range tests {
diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go
index 7da55ce93abe6..e427ba6485e64 100644
--- a/go/parquet/schema/schema_element_test.go
+++ b/go/parquet/schema/schema_element_test.go
@@ -192,7 +192,7 @@ func (s *SchemaElementConstructionSuite) TestSimple() {
 
 func (s *SchemaElementConstructionSuite) reconstructDecimal(c schemaElementConstructArgs) *decimalSchemaElementConstruction {
 	ret := s.reconstruct(c)
-	dec := c.logical.(*DecimalLogicalType)
+	dec := c.logical.(DecimalLogicalType)
 	return &decimalSchemaElementConstruction{*ret, int(dec.Precision()), int(dec.Scale())}
 }
 
@@ -359,7 +359,7 @@ func (s *SchemaElementConstructionSuite) TestTemporal() {
 
 func (s *SchemaElementConstructionSuite) reconstructInteger(c schemaElementConstructArgs) *intSchemaElementConstruction {
 	base := s.reconstruct(c)
-	l := c.logical.(*IntLogicalType)
+	l := c.logical.(IntLogicalType)
 	return &intSchemaElementConstruction{
 		*base,
 		l.BitWidth(),

From 82ecf3e6ed8cb58a08d600041617ce85c9bdb7c1 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 22 Aug 2024 22:57:14 +0200
Subject: [PATCH 07/63] MINOR: [CI][C++][Python] Fix Cuda builds on git main
 (#43789)

On the Cuda self-hosted runners, we need to use legacy `docker-compose` on all Archery Docker invocations, including the "image push" step. This is because the Docker client version on those runners is too old to accept the `--file` option to the `compose` subcommand.

This is a followup to https://github.com/apache/arrow/pull/43586 . The image push step cannot easily be verified in a PR, hence this second PR.

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/docker-tests/github.cuda.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml
index 9c7adf53a6f70..8c04da8a91a4f 100644
--- a/dev/tasks/docker-tests/github.cuda.yml
+++ b/dev/tasks/docker-tests/github.cuda.yml
@@ -26,6 +26,8 @@ jobs:
     runs-on: ['self-hosted', 'cuda']
 {{ macros.github_set_env(env) }}
     timeout-minutes: {{ timeout|default(60) }}
+    env:
+      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
     steps:
       {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }}
       # python 3.8 is installed on the runner, no need to install
@@ -34,7 +36,6 @@ jobs:
       - name: Execute Docker Build
         shell: bash
         env:
-          ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: |
           archery docker run \

From bad064f705ec9fc72efac2d13a1fc3fac6d3d137 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 22 Aug 2024 14:08:26 -0700
Subject: [PATCH 08/63] MINOR: [C++] Ensure setting the default
 CMAKE_BUILD_TYPE (#43794)

### Rationale for this change

The current logic for detecting whether the `CMAKE_BUILD_TYPE` is set is incorrect. That variable is never fully undefined; by default, in cases where it is unset is actually set to the empty string. Therefore, the condition that must be checked is not whether the variable is defined, but whether it tests to a truthy value (i.e. is a non-empty string).

I consider this a minor change so I have not opened an associated issue.

### What changes are included in this PR?

This PR changes `if(NOT DEFINED CMAKE_BUILD_TYPE)` to `if(NOT CMAKE_BUILD_TYPE)`.

### Are these changes tested?

Since this fixes a particular CMake build scenario I am not sure if a test is merited, or where one would be added.

### Are there any user-facing changes?

No.

Authored-by: Vyas Ramasubramani <vyasr@nvidia.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt                        | 2 +-
 cpp/examples/minimal_build/CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a1e3138da9e0b..5ead9e4b063cd 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -84,7 +84,7 @@ set(ARROW_VERSION "18.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
 # if no build type is specified, default to release builds
-if(NOT DEFINED CMAKE_BUILD_TYPE)
+if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE
       Release
       CACHE STRING "Choose the type of build.")
diff --git a/cpp/examples/minimal_build/CMakeLists.txt b/cpp/examples/minimal_build/CMakeLists.txt
index b4a7cde938c87..95dad34221add 100644
--- a/cpp/examples/minimal_build/CMakeLists.txt
+++ b/cpp/examples/minimal_build/CMakeLists.txt
@@ -30,7 +30,7 @@ endif()
 # We require a C++17 compliant compiler
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-if(NOT DEFINED CMAKE_BUILD_TYPE)
+if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Release)
 endif()
 

From 53b15b61691dde1ea86e14b7a2216fa0a26f8054 Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com>
Date: Fri, 23 Aug 2024 16:17:29 -0400
Subject: [PATCH 09/63] MINOR: [Go] Fix Flakey
 TestRowsPrematureCloseDuringNextLoop Test (#43804)

### Rationale for this change

Fixes a race condition in rows initialization that has been causing intermittent test failures.

### What changes are included in this PR?

Split query and init context. Update test to check for failure _after_ reading rows.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

Authored-by: Joel Lubinitsky <joellubi@gmail.com>
Signed-off-by: Joel Lubinitsky <joellubi@gmail.com>
---
 go/arrow/flight/flightsql/driver/driver.go      | 10 ++++++----
 go/arrow/flight/flightsql/driver/driver_test.go |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go
index 0f2b02deaca7c..0513fe1ecd346 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -266,13 +266,14 @@ func (s *Stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driv
 		return nil, err
 	}
 
+	execCtx := ctx
 	if _, set := ctx.Deadline(); !set && s.timeout > 0 {
 		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, s.timeout)
+		execCtx, cancel = context.WithTimeout(ctx, s.timeout)
 		defer cancel()
 	}
 
-	info, err := s.stmt.Execute(ctx)
+	info, err := s.stmt.Execute(execCtx)
 	if err != nil {
 		return nil, err
 	}
@@ -497,13 +498,14 @@ func (c *Connection) QueryContext(ctx context.Context, query string, args []driv
 		return nil, driver.ErrSkip
 	}
 
+	execCtx := ctx
 	if _, set := ctx.Deadline(); !set && c.timeout > 0 {
 		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, c.timeout)
+		execCtx, cancel = context.WithTimeout(ctx, c.timeout)
 		defer cancel()
 	}
 
-	info, err := c.client.Execute(ctx, query)
+	info, err := c.client.Execute(execCtx, query)
 	if err != nil {
 		return nil, err
 	}
diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go
index e5060ccbe33d0..c00dfe3c5d9a0 100644
--- a/go/arrow/flight/flightsql/driver/driver_test.go
+++ b/go/arrow/flight/flightsql/driver/driver_test.go
@@ -626,7 +626,6 @@ func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() {
 	rows, err := db.QueryContext(context.TODO(), sqlSelectAll)
 	require.NoError(t, err)
 	require.NotNil(t, rows)
-	require.NoError(t, rows.Err())
 
 	const closeAfterNRows = 10
 	var (
@@ -645,6 +644,7 @@ func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() {
 			require.NoError(t, rows.Close())
 		}
 	}
+	require.NoError(t, rows.Err())
 
 	require.Equal(t, closeAfterNRows, i)
 

From cb645a1b27dd66fddb88458c939e2851f9dadf35 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 24 Aug 2024 06:08:18 +0900
Subject: [PATCH 10/63] GH-43802: [GLib] Add `GAFlightRecordBatchWriter`
 (#43803)

### Rationale for this change

This is needed to implement `DoPut`.

### What changes are included in this PR?

We can't add tests for it because it's an abstract class.

I'm not sure `is_owner` is needed like
`GAFlightRecordBatchReader`. `is_owner` may be removed later if we find that it's needless.

### Are these changes tested?

No.

### Are there any user-facing changes?

Yes.

`GAFlightRecordBatchWriter` is a new public API.
* GitHub Issue: #43802

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-flight-glib/common.cpp | 198 ++++++++++++++++++++++++++--
 c_glib/arrow-flight-glib/common.h   |  32 +++++
 c_glib/arrow-flight-glib/common.hpp |   4 +
 3 files changed, 224 insertions(+), 10 deletions(-)

diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index efc544f10cf66..f7eea08c264b3 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -48,7 +48,11 @@ G_BEGIN_DECLS
  *
  * #GAFlightStreamChunk is a class for a chunk in stream.
  *
- * #GAFlightRecordBatchReader is a class for reading record batches.
+ * #GAFlightRecordBatchReader is an abstract class for reading record
+ * batches with metadata.
+ *
+ * #GAFlightRecordBatchWeriter is an abstract class for
+ * writing record batches with metadata.
  *
  * Since: 5.0.0
  */
@@ -1172,13 +1176,13 @@ typedef struct GAFlightRecordBatchReaderPrivate_
 } GAFlightRecordBatchReaderPrivate;
 
 enum {
-  PROP_READER = 1,
-  PROP_IS_OWNER,
+  PROP_RECORD_BATCH_READER_READER = 1,
+  PROP_RECORD_BATCH_READER_IS_OWNER,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
-                           gaflight_record_batch_reader,
-                           G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
+                                    gaflight_record_batch_reader,
+                                    G_TYPE_OBJECT)
 
 #define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj)                                    \
   static_cast<GAFlightRecordBatchReaderPrivate *>(                                       \
@@ -1204,11 +1208,11 @@ gaflight_record_batch_reader_set_property(GObject *object,
   auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_READER:
+  case PROP_RECORD_BATCH_READER_READER:
     priv->reader =
       static_cast<arrow::flight::MetadataRecordBatchReader *>(g_value_get_pointer(value));
     break;
-  case PROP_IS_OWNER:
+  case PROP_RECORD_BATCH_READER_IS_OWNER:
     priv->is_owner = g_value_get_boolean(value);
     break;
   default:
@@ -1236,7 +1240,7 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
     nullptr,
     nullptr,
     static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_READER, spec);
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER_READER, spec);
 
   spec = g_param_spec_boolean(
     "is-owner",
@@ -1244,7 +1248,7 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
     nullptr,
     TRUE,
     static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_IS_OWNER, spec);
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER_IS_OWNER, spec);
 }
 
 /**
@@ -1296,6 +1300,173 @@ gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError
   }
 }
 
+typedef struct GAFlightRecordBatchWriterPrivate_
+{
+  arrow::flight::MetadataRecordBatchWriter *writer;
+  bool is_owner;
+} GAFlightRecordBatchWriterPrivate;
+
+enum {
+  PROP_RECORD_BATCH_WRITER_WRITER = 1,
+  PROP_RECORD_BATCH_WRITER_IS_OWNER,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightRecordBatchWriter,
+                                    gaflight_record_batch_writer,
+                                    GARROW_TYPE_RECORD_BATCH_WRITER)
+
+#define GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object)                                 \
+  static_cast<GAFlightRecordBatchWriterPrivate *>(                                       \
+    gaflight_record_batch_writer_get_instance_private(                                   \
+      GAFLIGHT_RECORD_BATCH_WRITER(object)))
+
+static void
+gaflight_record_batch_writer_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object);
+  if (priv->is_owner) {
+    delete priv->writer;
+  }
+  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+}
+
+static void
+gaflight_record_batch_writer_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_RECORD_BATCH_WRITER_WRITER:
+    priv->writer =
+      static_cast<arrow::flight::MetadataRecordBatchWriter *>(g_value_get_pointer(value));
+    break;
+  case PROP_RECORD_BATCH_WRITER_IS_OWNER:
+    priv->is_owner = g_value_get_boolean(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_record_batch_writer_init(GAFlightRecordBatchWriter *object)
+{
+}
+
+static void
+gaflight_record_batch_writer_class_init(GAFlightRecordBatchWriterClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_record_batch_writer_finalize;
+  gobject_class->set_property = gaflight_record_batch_writer_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "writer",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER_WRITER, spec);
+
+  spec = g_param_spec_boolean(
+    "is-owner",
+    nullptr,
+    nullptr,
+    TRUE,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER_IS_OWNER, spec);
+}
+
+/**
+ * gaflight_record_batch_writer_begin:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @schema: A #GArrowSchema.
+ * @options: (nullable): A #GArrowWriteOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Begins writing data with the given schema. Only used with
+ * `DoExchange`.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
+                                   GArrowSchema *schema,
+                                   GArrowWriteOptions *options,
+                                   GError **error)
+{
+  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::ipc::IpcWriteOptions arrow_write_options;
+  if (options) {
+    arrow_write_options = *garrow_write_options_get_raw(options);
+  } else {
+    arrow_write_options = arrow::ipc::IpcWriteOptions::Defaults();
+  }
+  return garrow::check(error,
+                       flight_writer->Begin(arrow_schema, arrow_write_options),
+                       "[flight-record-batch-writer][begin]");
+}
+
+/**
+ * gaflight_record_batch_writer_write_metadata:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @metadata: A #GArrowBuffer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Write metadata.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
+                                            GArrowBuffer *metadata,
+                                            GError **error)
+{
+  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto arrow_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(error,
+                       flight_writer->WriteMetadata(arrow_metadata),
+                       "[flight-record-batch-writer][write-metadata]");
+}
+
+/**
+ * gaflight_record_batch_writer_write:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @record_batch: A #GArrowRecordBatch.
+ * @metadata: (nullable): A #GArrowBuffer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Write a record batch with metadata.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_write(GAFlightRecordBatchWriter *writer,
+                                   GArrowRecordBatch *record_batch,
+                                   GArrowBuffer *metadata,
+                                   GError **error)
+{
+  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  auto arrow_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(
+    error,
+    flight_writer->WriteWithMetadata(*arrow_record_batch, arrow_metadata),
+    "[flight-record-batch-writer][write]");
+}
+
 G_END_DECLS
 
 GAFlightCriteria *
@@ -1428,3 +1599,10 @@ gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader)
   auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(reader);
   return priv->reader;
 }
+
+arrow::flight::MetadataRecordBatchWriter *
+gaflight_record_batch_writer_get_raw(GAFlightRecordBatchWriter *writer)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(writer);
+  return priv->writer;
+}
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index b1d89f79c357e..91c828caabb36 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -232,4 +232,36 @@ GAFLIGHT_AVAILABLE_IN_6_0
 GArrowTable *
 gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error);
 
+#define GAFLIGHT_TYPE_RECORD_BATCH_WRITER (gaflight_record_batch_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchWriter,
+                         gaflight_record_batch_writer,
+                         GAFLIGHT,
+                         RECORD_BATCH_WRITER,
+                         GArrowRecordBatchWriter)
+struct _GAFlightRecordBatchWriterClass
+{
+  GArrowRecordBatchWriterClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
+                                   GArrowSchema *schema,
+                                   GArrowWriteOptions *options,
+                                   GError **error);
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
+                                            GArrowBuffer *metadata,
+                                            GError **error);
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_write(GAFlightRecordBatchWriter *writer,
+                                   GArrowRecordBatch *record_batch,
+                                   GArrowBuffer *metadata,
+                                   GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
index db56fff579baf..ae5a7703397dd 100644
--- a/c_glib/arrow-flight-glib/common.hpp
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -79,3 +79,7 @@ gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk);
 GAFLIGHT_EXTERN
 arrow::flight::MetadataRecordBatchReader *
 gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader);
+
+GAFLIGHT_EXTERN
+arrow::flight::MetadataRecordBatchWriter *
+gaflight_record_batch_writer_get_raw(GAFlightRecordBatchWriter *writer);

From 146b4e9669071984c883ec5791676638014bd655 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 24 Aug 2024 06:22:26 +0900
Subject: [PATCH 11/63] GH-43743: [CI][Docs] Ensure creating build directory
 (#43744)

### Rationale for this change

It's used as a volume. If it doesn't exist, `docker compose` reports an error:

    Error response from daemon: invalid mount config for type "bind": bind source path does not exist: /home/runner/work/crossbow/crossbow/build/

### What changes are included in this PR?

* Create build directory
* Move required `-v $PWD/build/:/build/` to `docs/github.linux.yml`

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #43743

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/docs/github.linux.yml | 4 +++-
 dev/tasks/tasks.yml             | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/tasks/docs/github.linux.yml b/dev/tasks/docs/github.linux.yml
index 8ab8a593c3ef3..5863d68d2c828 100644
--- a/dev/tasks/docs/github.linux.yml
+++ b/dev/tasks/docs/github.linux.yml
@@ -34,8 +34,10 @@ jobs:
         env:
           ARROW_JAVA_SKIP_GIT_PLUGIN: true
         run: |
+          mkdir -p build
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
+            -v $PWD/build/:/build/ \
             {{ flags|default("") }} \
             {{ image }} \
             {{ command|default("") }}
@@ -45,7 +47,7 @@ jobs:
           ref: {{ default_branch|default("main") }}
           path: crossbow
           fetch-depth: 1
-      {% if  publish %}
+      {% if publish %}
       - name: Prepare Docs Preview
         run: |
           # build files are created by the docker user
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 60114d6930878..cae34c3231381 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1487,7 +1487,7 @@ tasks:
       image: debian-go
   {% endfor %}
 
-  # be sure to update binary-task.rb when upgrading ubuntu
+  # be sure to update binary-task.rb when upgrading Debian
   test-debian-12-docs:
     ci: github
     template: docs/github.linux.yml
@@ -1495,7 +1495,6 @@ tasks:
       env:
         JDK: 17
       pr_number: Unset
-      flags: "-v $PWD/build/:/build/"
       image: debian-docs
       publish: false
     artifacts:
@@ -1621,6 +1620,5 @@ tasks:
       env:
         JDK: 17
       pr_number: Unset
-      flags: "-v $PWD/build/:/build/"
       image: debian-docs
       publish: true

From e61c105c73dfabb51d5afc972ff21cc5326b3d93 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Sat, 24 Aug 2024 07:07:09 +0530
Subject: [PATCH 12/63] GH-41584: [Java] ListView Implementation for C Data
 Interface (#43686)

### Rationale for this change

C Data Interface is missing `ListView` and `LargeListView` after recently merging core functionalities.

Also closes;

- [x] https://github.com/apache/arrow/issues/41585

### What changes are included in this PR?

This PR includes C Data interface related component additions to `ListView` and `LargeListView` along with the corresponding test cases.

### Are these changes tested?

Yes

### Are there any user-facing changes?

No
* GitHub Issue: #41584

Authored-by: Vibhatha Abeykoon <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 dev/archery/archery/integration/datagen.py    |   1 -
 .../arrow/c/BufferImportTypeVisitor.java      |  14 +-
 .../main/java/org/apache/arrow/c/Format.java  |   8 ++
 .../org/apache/arrow/c/RoundtripTest.java     |  42 ++++++
 java/c/src/test/python/integration_tests.py   |  47 ++++++
 .../BaseLargeRepeatedValueViewVector.java     |  29 ++--
 .../complex/BaseRepeatedValueViewVector.java  |  30 ++--
 .../vector/complex/LargeListViewVector.java   |  10 +-
 .../arrow/vector/complex/ListViewVector.java  |   6 +-
 .../arrow/vector/TestLargeListViewVector.java | 134 ++++++++++++++++++
 .../arrow/vector/TestListViewVector.java      | 132 +++++++++++++++++
 .../testing/ValueVectorDataPopulator.java     |  34 +++++
 12 files changed, 451 insertions(+), 36 deletions(-)

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 47310c905a9ff..d395d26cb71d3 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1936,7 +1936,6 @@ def _temp_path():
 
         generate_list_view_case()
         .skip_tester('C#')     # Doesn't support large list views
-        .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('nanoarrow')
         .skip_tester('Rust'),
diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
index 633ecd43bd570..93fef6d7ca801 100644
--- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
+++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
@@ -47,7 +47,9 @@
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -400,13 +402,17 @@ public List<ArrowBuf> visit(ArrowType.Duration type) {
 
   @Override
   public List<ArrowBuf> visit(ArrowType.ListView type) {
-    throw new UnsupportedOperationException(
-        "Importing buffers for view type: " + type + " not supported");
+    return Arrays.asList(
+        maybeImportBitmap(type),
+        importFixedBytes(type, 1, ListViewVector.OFFSET_WIDTH),
+        importFixedBytes(type, 2, ListViewVector.SIZE_WIDTH));
   }
 
   @Override
   public List<ArrowBuf> visit(ArrowType.LargeListView type) {
-    throw new UnsupportedOperationException(
-        "Importing buffers for view type: " + type + " not supported");
+    return Arrays.asList(
+        maybeImportBitmap(type),
+        importFixedBytes(type, 1, LargeListViewVector.OFFSET_WIDTH),
+        importFixedBytes(type, 2, LargeListViewVector.SIZE_WIDTH));
   }
 }
diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java b/java/c/src/main/java/org/apache/arrow/c/Format.java
index aff51e7b734ab..f77a555d18481 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Format.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Format.java
@@ -229,6 +229,10 @@ static String asString(ArrowType arrowType) {
         return "vu";
       case BinaryView:
         return "vz";
+      case ListView:
+        return "+vl";
+      case LargeListView:
+        return "+vL";
       case NONE:
         throw new IllegalArgumentException("Arrow type ID is NONE");
       default:
@@ -313,6 +317,10 @@ static ArrowType asType(String format, long flags)
         return new ArrowType.Utf8View();
       case "vz":
         return new ArrowType.BinaryView();
+      case "+vl":
+        return new ArrowType.ListView();
+      case "+vL":
+        return new ArrowType.LargeListView();
       default:
         String[] parts = format.split(":", 2);
         if (parts.length == 2) {
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index 6591d1f730990..18b2e94adde47 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -84,7 +84,9 @@
 import org.apache.arrow.vector.compare.VectorEqualsVisitor;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -683,6 +685,46 @@ public void testFixedSizeListVector() {
     }
   }
 
+  @Test
+  public void testListViewVector() {
+    try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+      setVector(
+          vector,
+          Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+          Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+          new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, ListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testEmptyListViewVector() {
+    try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+      setVector(vector, new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, ListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVector() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) {
+      setVector(
+          vector,
+          Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+          Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+          new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, LargeListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testEmptyLargeListViewVector() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) {
+      setVector(vector, new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, LargeListViewVector.class));
+    }
+  }
+
   @Test
   public void testMapVector() {
     int count = 5;
diff --git a/java/c/src/test/python/integration_tests.py b/java/c/src/test/python/integration_tests.py
index ab2ee1742f366..b0a86e9c66e59 100644
--- a/java/c/src/test/python/integration_tests.py
+++ b/java/c/src/test/python/integration_tests.py
@@ -352,6 +352,53 @@ def test_reader_complex_roundtrip(self):
         ]
         self.round_trip_reader(schema, data)
 
+    def test_listview_array(self):
+        self.round_trip_array(lambda: pa.array(
+            [[], [0], [1, 2], [4, 5, 6]], pa.list_view(pa.int64())
+            # disabled check_metadata since in Java API the listview
+            # internal field name ("item") is not preserved 
+            # during round trips (it becomes "$data$").
+        ), check_metadata=False)
+
+    def test_empty_listview_array(self):
+        with pa.BufferOutputStream() as bos:
+            schema = pa.schema([pa.field("f0", pa.list_view(pa.int32()), True)])
+            with ipc.new_stream(bos, schema) as writer:
+                src = pa.RecordBatch.from_arrays(
+                    [pa.array([[]], pa.list_view(pa.int32()))], schema=schema)
+                writer.write(src)
+        data_bytes = bos.getvalue()
+
+        def recreate_batch():
+            with pa.input_stream(data_bytes) as ios:
+                with ipc.open_stream(ios) as reader:
+                    return reader.read_next_batch()
+
+        self.round_trip_record_batch(recreate_batch)
+
+    def test_largelistview_array(self):
+        self.round_trip_array(lambda: pa.array(
+            [[], [0], [1, 2], [4, 5, 6]], pa.large_list_view(pa.int64())
+            # disabled check_metadata since in Java API the listview
+            # internal field name ("item") is not preserved
+            # during round trips (it becomes "$data$").
+        ), check_metadata=False)
+
+    def test_empty_largelistview_array(self):
+        with pa.BufferOutputStream() as bos:
+            schema = pa.schema([pa.field("f0", pa.large_list_view(pa.int32()), True)])
+            with ipc.new_stream(bos, schema) as writer:
+                src = pa.RecordBatch.from_arrays(
+                    [pa.array([[]], pa.large_list_view(pa.int32()))], schema=schema)
+                writer.write(src)
+        data_bytes = bos.getvalue()
+
+        def recreate_batch():
+            with pa.input_stream(data_bytes) as ios:
+                with ipc.open_stream(ios) as reader:
+                    return reader.read_next_batch()
+
+        self.round_trip_record_batch(recreate_batch)
 
 if __name__ == '__main__':
     unittest.main(verbosity=2)
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
index f643306cfdcff..12edd6557bd9c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
@@ -305,38 +305,43 @@ public void setValueCount(int valueCount) {
     while (valueCount > getOffsetBufferValueCapacity()) {
       reallocateBuffers();
     }
-    final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector();
+    final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector();
     vector.setValueCount(childValueCount);
   }
 
-  protected int getLengthOfChildVector() {
+  /**
+   * Get the end of the child vector via the maximum view length. This method deduces the length by
+   * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+   *
+   * @return the end of the child vector.
+   */
+  protected int getMaxViewEndChildVector() {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < valueCount; i++) {
       int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
-  protected int getLengthOfChildVectorByIndex(int index) {
+  /**
+   * Get the end of the child vector via the maximum view length of the child vector by index.
+   *
+   * @return the end of the child vector by index
+   */
+  protected int getMaxViewEndChildVectorByIndex(int index) {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < index; i++) {
       int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
   /**
@@ -390,7 +395,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt((long) index * OFFSET_WIDTH, prevOffset);
     }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
index 031cc8037bb8b..e6213316b55a3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
@@ -304,38 +304,44 @@ public void setValueCount(int valueCount) {
     while (valueCount > getOffsetBufferValueCapacity()) {
       reallocateBuffers();
     }
-    final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector();
+    final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector();
     vector.setValueCount(childValueCount);
   }
 
-  protected int getLengthOfChildVector() {
+  /**
+   * Get the end of the child vector via the maximum view length. This method deduces the length by
+   * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+   *
+   * @return the end of the child vector.
+   */
+  protected int getMaxViewEndChildVector() {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < valueCount; i++) {
       int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
-  protected int getLengthOfChildVectorByIndex(int index) {
+  /**
+   * Get the end of the child vector via the maximum view length of the child vector by index.
+   *
+   * @return the end of the child vector by index
+   */
+  protected int getMaxViewEndChildVectorByIndex(int index) {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
+    // int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < index; i++) {
       int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
   /**
@@ -389,7 +395,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
index 2c61f799a4cf9..84c6f03edb25d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
@@ -250,7 +250,9 @@ public List<ArrowBuf> getFieldBuffers() {
    */
   @Override
   public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, long nullValue) {
-    throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet");
+    exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true);
   }
 
   @Override
@@ -851,7 +853,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
@@ -943,7 +945,7 @@ public void setValueCount(int valueCount) {
       }
     }
     /* valueCount for the data vector is the current end offset */
-    final long childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector();
+    final long childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector();
     /* set the value count of data vector and this will take care of
      * checking whether data buffer needs to be reallocated.
      * TODO: revisit when 64-bit vectors are supported
@@ -1001,7 +1003,7 @@ public double getDensity() {
     if (valueCount == 0) {
       return 0.0D;
     }
-    final double totalListSize = getLengthOfChildVector();
+    final double totalListSize = getMaxViewEndChildVector();
     return totalListSize / valueCount;
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
index 7f6d92f3be9c8..9b4e6b4c0cd4a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
@@ -858,7 +858,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
@@ -942,7 +942,7 @@ public void setValueCount(int valueCount) {
       }
     }
     /* valueCount for the data vector is the current end offset */
-    final int childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector();
+    final int childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector();
     /* set the value count of data vector and this will take care of
      * checking whether data buffer needs to be reallocated.
      */
@@ -1005,7 +1005,7 @@ public double getDensity() {
     if (valueCount == 0) {
       return 0.0D;
     }
-    final double totalListSize = getLengthOfChildVector();
+    final double totalListSize = getMaxViewEndChildVector();
     return totalListSize / valueCount;
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
index 2ed8d4d7005ea..26e7bb4a0d3b2 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
@@ -2095,6 +2095,140 @@ public void testOutOfOrderOffsetSplitAndTransfer() {
     }
   }
 
+  @Test
+  public void testRangeChildVector1() {
+    /*
+     * Non-overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [4, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[0, 1, 2, 3], [2]]
+     * */
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("largelistview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 1);
+
+      largeListViewVector.setOffset(0, 0);
+      largeListViewVector.setOffset(1, 2);
+
+      largeListViewVector.setSize(0, 4);
+      largeListViewVector.setSize(1, 1);
+
+      assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+      largeListViewVector.setValueCount(2);
+      assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector2() {
+    /*
+     * Overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [3, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[1, 2, 3], [2]]
+     * */
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("largelistview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 1);
+
+      largeListViewVector.setOffset(0, 1);
+      largeListViewVector.setOffset(1, 2);
+
+      largeListViewVector.setSize(0, 3);
+      largeListViewVector.setSize(1, 1);
+
+      assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+      largeListViewVector.setValueCount(2);
+      assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
   private void writeIntValues(UnionLargeListViewWriter writer, int[] values) {
     writer.startListView();
     for (int v : values) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
index 4fa808c18aece..639585fc48d0a 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
@@ -2084,6 +2084,138 @@ public void testOutOfOrderOffsetSplitAndTransfer() {
     }
   }
 
+  @Test
+  public void testRangeChildVector1() {
+    /*
+     * Non-overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [4, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[0, 1, 2, 3], [2]]
+     * */
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      listViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = listViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      listViewVector.setValidity(0, 1);
+      listViewVector.setValidity(1, 1);
+
+      listViewVector.setOffset(0, 0);
+      listViewVector.setOffset(1, 2);
+
+      listViewVector.setSize(0, 4);
+      listViewVector.setSize(1, 1);
+
+      assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+      listViewVector.setValueCount(2);
+      assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector2() {
+    /*
+     * Overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [3, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[1, 2, 3], [2]]
+     * */
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      listViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = listViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      listViewVector.setValidity(0, 1);
+      listViewVector.setValidity(1, 1);
+
+      listViewVector.setOffset(0, 1);
+      listViewVector.setOffset(1, 2);
+
+      listViewVector.setSize(0, 3);
+      listViewVector.setSize(1, 1);
+
+      assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+      listViewVector.setValueCount(2);
+      assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
   private void writeIntValues(UnionListViewWriter writer, int[] values) {
     writer.startListView();
     for (int v : values) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index 69e16dc470351..afbc30f019ef6 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -60,10 +60,12 @@
 import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VariableWidthFieldVector;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.StructVector;
@@ -760,4 +762,36 @@ public static void setVector(ListViewVector vector, List<Integer>... values) {
     dataVector.setValueCount(curPos);
     vector.setValueCount(values.length);
   }
+
+  /** Populate values for {@link ListViewVector}. */
+  public static void setVector(LargeListViewVector vector, List<Integer>... values) {
+    vector.allocateNewSafe();
+    Types.MinorType type = Types.MinorType.INT;
+    vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+    IntVector dataVector = (IntVector) vector.getDataVector();
+    dataVector.allocateNew();
+
+    // set underlying vectors
+    int curPos = 0;
+    for (int i = 0; i < values.length; i++) {
+      vector
+          .getOffsetBuffer()
+          .setInt((long) i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH, curPos);
+      if (values[i] == null) {
+        BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+      } else {
+        BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+        for (int value : values[i]) {
+          dataVector.setSafe(curPos, value);
+          curPos += 1;
+        }
+      }
+      vector
+          .getSizeBuffer()
+          .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH, values[i].size());
+    }
+    dataVector.setValueCount(curPos);
+    vector.setValueCount(values.length);
+  }
 }

From 83d915a3d2ac2acecbb2cb2dc0dd7f5a213dd625 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 12:38:38 +0900
Subject: [PATCH 13/63] MINOR: [Java] Bump dep.slf4j.version from 2.0.13 to
 2.0.16 in /java (#43652)

Bumps `dep.slf4j.version` from 2.0.13 to 2.0.16.
Updates `org.slf4j:slf4j-api` from 2.0.13 to 2.0.16

Updates `org.slf4j:slf4j-jdk14` from 2.0.13 to 2.0.16

Updates `org.slf4j:jul-to-slf4j` from 2.0.13 to 2.0.16

Updates `org.slf4j:jcl-over-slf4j` from 2.0.13 to 2.0.16

Updates `org.slf4j:log4j-over-slf4j` from 2.0.13 to 2.0.16

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index a73453df68fd2..54bb7a0ae0eb9 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -94,7 +94,7 @@ under the License.
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
     <dep.junit.jupiter.version>5.10.3</dep.junit.jupiter.version>
-    <dep.slf4j.version>2.0.13</dep.slf4j.version>
+    <dep.slf4j.version>2.0.16</dep.slf4j.version>
     <dep.guava-bom.version>33.2.1-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.112.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.66.0</dep.grpc-bom.version>

From cbb5f96306972aa236750602aba4b40ceb4219c4 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Sun, 25 Aug 2024 21:33:51 -0700
Subject: [PATCH 14/63] MINOR: [R] Add missing PR num to news.md item (#43811)

### Rationale for this change

We normally link to somewhere to give the user more context on news items. I noticed the link was missing for this one.

### What changes are included in this PR?

Added PR number to news item.

### Are these changes tested?

No.

### Are there any user-facing changes?

No.

Authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 0e6e4634a0af8..b9568afe66542 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -32,7 +32,7 @@
   functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223)
 * `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350)
 * `summarize()` supports more complex expressions, and correctly handles cases
-  where column names are reused in expressions.
+  where column names are reused in expressions. (#41223)
 * The `na_matches` argument to the `dplyr::*_join()` functions is now supported.
   This argument controls whether `NA` values are considered equal when joining. (#41358)
 * R metadata, stored in the Arrow schema to support round-tripping data between

From 51e9f70f94cd09a0a08196afdd2f4fc644666b5e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 16:20:20 +0900
Subject: [PATCH 15/63] MINOR: [Java] Bump dep.junit.jupiter.version from
 5.10.3 to 5.11.0 in /java (#43751)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps `dep.junit.jupiter.version` from 5.10.3 to 5.11.0.
Updates `org.junit.jupiter:junit-jupiter-engine` from 5.10.3 to 5.11.0
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit5/releases">org.junit.jupiter:junit-jupiter-engine's releases</a>.</em></p>
<blockquote>
<p>JUnit 5.11.0 = Platform 1.11.0 + Jupiter 5.11.0 + Vintage 5.11.0</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/pshevche"><code>@​pshevche</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3427">junit-team/junit5#3427</a></li>
<li><a href="https://github.com/rybak"><code>@​rybak</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3416">junit-team/junit5#3416</a></li>
<li><a href="https://github.com/pixeebot"><code>@​pixeebot</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3491">junit-team/junit5#3491</a></li>
<li><a href="https://github.com/shartte"><code>@​shartte</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3562">junit-team/junit5#3562</a></li>
<li><a href="https://github.com/eliasnogueira"><code>@​eliasnogueira</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3674">junit-team/junit5#3674</a></li>
<li><a href="https://github.com/bigdaz"><code>@​bigdaz</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3668">junit-team/junit5#3668</a></li>
<li><a href="https://github.com/gilday"><code>@​gilday</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3667">junit-team/junit5#3667</a></li>
<li><a href="https://github.com/bjmi"><code>@​bjmi</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3806">junit-team/junit5#3806</a></li>
<li><a href="https://github.com/madalingiurca"><code>@​madalingiurca</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3787">junit-team/junit5#3787</a></li>
<li><a href="https://github.com/dmlloyd"><code>@​dmlloyd</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3820">junit-team/junit5#3820</a></li>
<li><a href="https://github.com/compf"><code>@​compf</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3867">junit-team/junit5#3867</a></li>
<li><a href="https://github.com/SveinKare"><code>@​SveinKare</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3840">junit-team/junit5#3840</a></li>
<li><a href="https://github.com/mobounya"><code>@​mobounya</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3720">junit-team/junit5#3720</a></li>
<li><a href="https://github.com/robinjhector"><code>@​robinjhector</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3345">junit-team/junit5#3345</a></li>
<li><a href="https://github.com/jabhatfield"><code>@​jabhatfield</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3829">junit-team/junit5#3829</a></li>
<li><a href="https://github.com/rfscholte"><code>@​rfscholte</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3909">junit-team/junit5#3909</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0">https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0</a></p>
<p>JUnit 5.11.0-RC1 = Platform 1.11.0-RC1 + Jupiter 5.11.0-RC1 + Vintage 5.11.0-RC1</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0-RC1/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/compf"><code>@​compf</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3867">junit-team/junit5#3867</a></li>
<li><a href="https://github.com/SveinKare"><code>@​SveinKare</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3840">junit-team/junit5#3840</a></li>
<li><a href="https://github.com/mobounya"><code>@​mobounya</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3720">junit-team/junit5#3720</a></li>
<li><a href="https://github.com/robinjhector"><code>@​robinjhector</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3345">junit-team/junit5#3345</a></li>
<li><a href="https://github.com/jabhatfield"><code>@​jabhatfield</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3829">junit-team/junit5#3829</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.11.0-M2...r5.11.0-RC1">https://github.com/junit-team/junit5/compare/r5.11.0-M2...r5.11.0-RC1</a></p>
<p>JUnit 5.11.0-M2 = Platform 1.11.0-M2 + Jupiter 5.11.0-M2 + Vintage 5.11.0-M2</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0-M2/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/bjmi"><code>@​bjmi</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3806">junit-team/junit5#3806</a></li>
<li><a href="https://github.com/madalingiurca"><code>@​madalingiurca</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3787">junit-team/junit5#3787</a></li>
<li><a href="https://github.com/dmlloyd"><code>@​dmlloyd</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3820">junit-team/junit5#3820</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.11.0-M1...r5.11.0-M2">https://github.com/junit-team/junit5/compare/r5.11.0-M1...r5.11.0-M2</a></p>
<p>JUnit 5.11.0-M1 = Platform 1.11.0-M1 + Jupiter 5.11.0-M1 + Vintage 5.11.0-M1</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit5/commit/6b8e42b7a7d1606962341a61941c60b045646278"><code>6b8e42b</code></a> Release 5.11</li>
<li><a href="https://github.com/junit-team/junit5/commit/9430ecee6b99d9438c5a0204549ab88fc66ead86"><code>9430ece</code></a> Allow potentially unlimited maxCharsPerColumn in Csv{File}Source (<a href="https://redirect.github.com/junit-team/junit5/issues/3924">#3924</a>)</li>
<li><a href="https://github.com/junit-team/junit5/commit/0b10f86dd2e0a7fd232c1de032d1e2fbe312f615"><code>0b10f86</code></a> Polish release notes</li>
<li><a href="https://github.com/junit-team/junit5/commit/4dbd0f943efd53e49f8896ec1c9f677526c212cb"><code>4dbd0f9</code></a> Let <code>@ TempDir</code> fail fast with <code>File</code> annotated element and non-default file s...</li>
<li><a href="https://github.com/junit-team/junit5/commit/57f1ad4efd75236e531b9bcbad7c955eb1fb3943"><code>57f1ad4</code></a> Fix syntax</li>
<li><a href="https://github.com/junit-team/junit5/commit/d78730ae9f74bc63a136a29f5c5332154731c99b"><code>d78730a</code></a> Prioritize tasks on critical path of task graph</li>
<li><a href="https://github.com/junit-team/junit5/commit/b6719e2e05ea5001f25dc1628917d23d7e3e76dc"><code>b6719e2</code></a> Remove obsolete directory</li>
<li><a href="https://github.com/junit-team/junit5/commit/d8ec757357932e224ea081b1c8b9d993f143e75f"><code>d8ec757</code></a> Apply Spotless formatting to Gradle script plugins</li>
<li><a href="https://github.com/junit-team/junit5/commit/dae525d51c0811f69f3087b38f24fa9053a31d36"><code>dae525d</code></a> Disable caching of some Spotless tasks due to negative avoidance savings</li>
<li><a href="https://github.com/junit-team/junit5/commit/c63d11843506d908584ebde270d1b3b299417d54"><code>c63d118</code></a> Re-enable caching verifyOSGi tasks (issue was fixed in bnd 7.0.0)</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0">compare view</a></li>
</ul>
</details>
<br />

Updates `org.junit.jupiter:junit-jupiter-api` from 5.10.3 to 5.11.0
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit5/releases">org.junit.jupiter:junit-jupiter-api's releases</a>.</em></p>
<blockquote>
<p>JUnit 5.11.0 = Platform 1.11.0 + Jupiter 5.11.0 + Vintage 5.11.0</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/pshevche"><code>@​pshevche</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3427">junit-team/junit5#3427</a></li>
<li><a href="https://github.com/rybak"><code>@​rybak</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3416">junit-team/junit5#3416</a></li>
<li><a href="https://github.com/pixeebot"><code>@​pixeebot</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3491">junit-team/junit5#3491</a></li>
<li><a href="https://github.com/shartte"><code>@​shartte</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3562">junit-team/junit5#3562</a></li>
<li><a href="https://github.com/eliasnogueira"><code>@​eliasnogueira</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3674">junit-team/junit5#3674</a></li>
<li><a href="https://github.com/bigdaz"><code>@​bigdaz</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3668">junit-team/junit5#3668</a></li>
<li><a href="https://github.com/gilday"><code>@​gilday</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3667">junit-team/junit5#3667</a></li>
<li><a href="https://github.com/bjmi"><code>@​bjmi</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3806">junit-team/junit5#3806</a></li>
<li><a href="https://github.com/madalingiurca"><code>@​madalingiurca</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3787">junit-team/junit5#3787</a></li>
<li><a href="https://github.com/dmlloyd"><code>@​dmlloyd</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3820">junit-team/junit5#3820</a></li>
<li><a href="https://github.com/compf"><code>@​compf</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3867">junit-team/junit5#3867</a></li>
<li><a href="https://github.com/SveinKare"><code>@​SveinKare</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3840">junit-team/junit5#3840</a></li>
<li><a href="https://github.com/mobounya"><code>@​mobounya</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3720">junit-team/junit5#3720</a></li>
<li><a href="https://github.com/robinjhector"><code>@​robinjhector</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3345">junit-team/junit5#3345</a></li>
<li><a href="https://github.com/jabhatfield"><code>@​jabhatfield</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3829">junit-team/junit5#3829</a></li>
<li><a href="https://github.com/rfscholte"><code>@​rfscholte</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3909">junit-team/junit5#3909</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0">https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0</a></p>
<p>JUnit 5.11.0-RC1 = Platform 1.11.0-RC1 + Jupiter 5.11.0-RC1 + Vintage 5.11.0-RC1</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0-RC1/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/compf"><code>@​compf</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3867">junit-team/junit5#3867</a></li>
<li><a href="https://github.com/SveinKare"><code>@​SveinKare</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3840">junit-team/junit5#3840</a></li>
<li><a href="https://github.com/mobounya"><code>@​mobounya</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3720">junit-team/junit5#3720</a></li>
<li><a href="https://github.com/robinjhector"><code>@​robinjhector</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3345">junit-team/junit5#3345</a></li>
<li><a href="https://github.com/jabhatfield"><code>@​jabhatfield</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3829">junit-team/junit5#3829</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.11.0-M2...r5.11.0-RC1">https://github.com/junit-team/junit5/compare/r5.11.0-M2...r5.11.0-RC1</a></p>
<p>JUnit 5.11.0-M2 = Platform 1.11.0-M2 + Jupiter 5.11.0-M2 + Vintage 5.11.0-M2</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0-M2/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/bjmi"><code>@​bjmi</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3806">junit-team/junit5#3806</a></li>
<li><a href="https://github.com/madalingiurca"><code>@​madalingiurca</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3787">junit-team/junit5#3787</a></li>
<li><a href="https://github.com/dmlloyd"><code>@​dmlloyd</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3820">junit-team/junit5#3820</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.11.0-M1...r5.11.0-M2">https://github.com/junit-team/junit5/compare/r5.11.0-M1...r5.11.0-M2</a></p>
<p>JUnit 5.11.0-M1 = Platform 1.11.0-M1 + Jupiter 5.11.0-M1 + Vintage 5.11.0-M1</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit5/commit/6b8e42b7a7d1606962341a61941c60b045646278"><code>6b8e42b</code></a> Release 5.11</li>
<li><a href="https://github.com/junit-team/junit5/commit/9430ecee6b99d9438c5a0204549ab88fc66ead86"><code>9430ece</code></a> Allow potentially unlimited maxCharsPerColumn in Csv{File}Source (<a href="https://redirect.github.com/junit-team/junit5/issues/3924">#3924</a>)</li>
<li><a href="https://github.com/junit-team/junit5/commit/0b10f86dd2e0a7fd232c1de032d1e2fbe312f615"><code>0b10f86</code></a> Polish release notes</li>
<li><a href="https://github.com/junit-team/junit5/commit/4dbd0f943efd53e49f8896ec1c9f677526c212cb"><code>4dbd0f9</code></a> Let <code>@ TempDir</code> fail fast with <code>File</code> annotated element and non-default file s...</li>
<li><a href="https://github.com/junit-team/junit5/commit/57f1ad4efd75236e531b9bcbad7c955eb1fb3943"><code>57f1ad4</code></a> Fix syntax</li>
<li><a href="https://github.com/junit-team/junit5/commit/d78730ae9f74bc63a136a29f5c5332154731c99b"><code>d78730a</code></a> Prioritize tasks on critical path of task graph</li>
<li><a href="https://github.com/junit-team/junit5/commit/b6719e2e05ea5001f25dc1628917d23d7e3e76dc"><code>b6719e2</code></a> Remove obsolete directory</li>
<li><a href="https://github.com/junit-team/junit5/commit/d8ec757357932e224ea081b1c8b9d993f143e75f"><code>d8ec757</code></a> Apply Spotless formatting to Gradle script plugins</li>
<li><a href="https://github.com/junit-team/junit5/commit/dae525d51c0811f69f3087b38f24fa9053a31d36"><code>dae525d</code></a> Disable caching of some Spotless tasks due to negative avoidance savings</li>
<li><a href="https://github.com/junit-team/junit5/commit/c63d11843506d908584ebde270d1b3b299417d54"><code>c63d118</code></a> Re-enable caching verifyOSGi tasks (issue was fixed in bnd 7.0.0)</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0">compare view</a></li>
</ul>
</details>
<br />

Updates `org.junit.jupiter:junit-jupiter-params` from 5.10.3 to 5.11.0
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit5/releases">org.junit.jupiter:junit-jupiter-params's releases</a>.</em></p>
<blockquote>
<p>JUnit 5.11.0 = Platform 1.11.0 + Jupiter 5.11.0 + Vintage 5.11.0</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/pshevche"><code>@​pshevche</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3427">junit-team/junit5#3427</a></li>
<li><a href="https://github.com/rybak"><code>@​rybak</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3416">junit-team/junit5#3416</a></li>
<li><a href="https://github.com/pixeebot"><code>@​pixeebot</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3491">junit-team/junit5#3491</a></li>
<li><a href="https://github.com/shartte"><code>@​shartte</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3562">junit-team/junit5#3562</a></li>
<li><a href="https://github.com/eliasnogueira"><code>@​eliasnogueira</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3674">junit-team/junit5#3674</a></li>
<li><a href="https://github.com/bigdaz"><code>@​bigdaz</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3668">junit-team/junit5#3668</a></li>
<li><a href="https://github.com/gilday"><code>@​gilday</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3667">junit-team/junit5#3667</a></li>
<li><a href="https://github.com/bjmi"><code>@​bjmi</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3806">junit-team/junit5#3806</a></li>
<li><a href="https://github.com/madalingiurca"><code>@​madalingiurca</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3787">junit-team/junit5#3787</a></li>
<li><a href="https://github.com/dmlloyd"><code>@​dmlloyd</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3820">junit-team/junit5#3820</a></li>
<li><a href="https://github.com/compf"><code>@​compf</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3867">junit-team/junit5#3867</a></li>
<li><a href="https://github.com/SveinKare"><code>@​SveinKare</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3840">junit-team/junit5#3840</a></li>
<li><a href="https://github.com/mobounya"><code>@​mobounya</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3720">junit-team/junit5#3720</a></li>
<li><a href="https://github.com/robinjhector"><code>@​robinjhector</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3345">junit-team/junit5#3345</a></li>
<li><a href="https://github.com/jabhatfield"><code>@​jabhatfield</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3829">junit-team/junit5#3829</a></li>
<li><a href="https://github.com/rfscholte"><code>@​rfscholte</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3909">junit-team/junit5#3909</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0">https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0</a></p>
<p>JUnit 5.11.0-RC1 = Platform 1.11.0-RC1 + Jupiter 5.11.0-RC1 + Vintage 5.11.0-RC1</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0-RC1/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/compf"><code>@​compf</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3867">junit-team/junit5#3867</a></li>
<li><a href="https://github.com/SveinKare"><code>@​SveinKare</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3840">junit-team/junit5#3840</a></li>
<li><a href="https://github.com/mobounya"><code>@​mobounya</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3720">junit-team/junit5#3720</a></li>
<li><a href="https://github.com/robinjhector"><code>@​robinjhector</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3345">junit-team/junit5#3345</a></li>
<li><a href="https://github.com/jabhatfield"><code>@​jabhatfield</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3829">junit-team/junit5#3829</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.11.0-M2...r5.11.0-RC1">https://github.com/junit-team/junit5/compare/r5.11.0-M2...r5.11.0-RC1</a></p>
<p>JUnit 5.11.0-M2 = Platform 1.11.0-M2 + Jupiter 5.11.0-M2 + Vintage 5.11.0-M2</p>
<p>See <a href="http://junit.org/junit5/docs/5.11.0-M2/release-notes/">Release Notes</a>.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/bjmi"><code>@​bjmi</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3806">junit-team/junit5#3806</a></li>
<li><a href="https://github.com/madalingiurca"><code>@​madalingiurca</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3787">junit-team/junit5#3787</a></li>
<li><a href="https://github.com/dmlloyd"><code>@​dmlloyd</code></a> made their first contribution in <a href="https://redirect.github.com/junit-team/junit5/pull/3820">junit-team/junit5#3820</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.11.0-M1...r5.11.0-M2">https://github.com/junit-team/junit5/compare/r5.11.0-M1...r5.11.0-M2</a></p>
<p>JUnit 5.11.0-M1 = Platform 1.11.0-M1 + Jupiter 5.11.0-M1 + Vintage 5.11.0-M1</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit5/commit/6b8e42b7a7d1606962341a61941c60b045646278"><code>6b8e42b</code></a> Release 5.11</li>
<li><a href="https://github.com/junit-team/junit5/commit/9430ecee6b99d9438c5a0204549ab88fc66ead86"><code>9430ece</code></a> Allow potentially unlimited maxCharsPerColumn in Csv{File}Source (<a href="https://redirect.github.com/junit-team/junit5/issues/3924">#3924</a>)</li>
<li><a href="https://github.com/junit-team/junit5/commit/0b10f86dd2e0a7fd232c1de032d1e2fbe312f615"><code>0b10f86</code></a> Polish release notes</li>
<li><a href="https://github.com/junit-team/junit5/commit/4dbd0f943efd53e49f8896ec1c9f677526c212cb"><code>4dbd0f9</code></a> Let <code>@ TempDir</code> fail fast with <code>File</code> annotated element and non-default file s...</li>
<li><a href="https://github.com/junit-team/junit5/commit/57f1ad4efd75236e531b9bcbad7c955eb1fb3943"><code>57f1ad4</code></a> Fix syntax</li>
<li><a href="https://github.com/junit-team/junit5/commit/d78730ae9f74bc63a136a29f5c5332154731c99b"><code>d78730a</code></a> Prioritize tasks on critical path of task graph</li>
<li><a href="https://github.com/junit-team/junit5/commit/b6719e2e05ea5001f25dc1628917d23d7e3e76dc"><code>b6719e2</code></a> Remove obsolete directory</li>
<li><a href="https://github.com/junit-team/junit5/commit/d8ec757357932e224ea081b1c8b9d993f143e75f"><code>d8ec757</code></a> Apply Spotless formatting to Gradle script plugins</li>
<li><a href="https://github.com/junit-team/junit5/commit/dae525d51c0811f69f3087b38f24fa9053a31d36"><code>dae525d</code></a> Disable caching of some Spotless tasks due to negative avoidance savings</li>
<li><a href="https://github.com/junit-team/junit5/commit/c63d11843506d908584ebde270d1b3b299417d54"><code>c63d118</code></a> Re-enable caching verifyOSGi tasks (issue was fixed in bnd 7.0.0)</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit5/compare/r5.10.3...r5.11.0">compare view</a></li>
</ul>
</details>
<br />

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 54bb7a0ae0eb9..77feed12f3f1d 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -93,7 +93,7 @@ under the License.
   <properties>
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
-    <dep.junit.jupiter.version>5.10.3</dep.junit.jupiter.version>
+    <dep.junit.jupiter.version>5.11.0</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.16</dep.slf4j.version>
     <dep.guava-bom.version>33.2.1-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.112.Final</dep.netty-bom.version>

From 2328b6ee39b497d9f48e6d342db9f7d0c34d9791 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Mon, 26 Aug 2024 16:34:18 +0200
Subject: [PATCH 16/63] GH-15058: [C++][Python] Native support for UUID
 (#37298)

### Rationale for this change

See #15058.
UUID datatype is common in throughout the ecosystem and Arrow as supporting it as a native type would reduce friction.

### What changes are included in this PR?

This PR implements logic for Arrow canonical extension type in C++ and a Python wrapper.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes, new extension type is added.
* Closes: #15058

Authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/CMakeLists.txt                  |   3 +-
 cpp/src/arrow/acero/hash_join_node_test.cc    |   1 +
 cpp/src/arrow/extension/CMakeLists.txt        |   2 +-
 .../extension/fixed_shape_tensor_test.cc      |  17 +--
 cpp/src/arrow/extension/uuid.cc               |  58 ++++++++++
 cpp/src/arrow/extension/uuid.h                |  61 ++++++++++
 cpp/src/arrow/extension/uuid_test.cc          |  72 ++++++++++++
 cpp/src/arrow/extension_type.cc               |   4 +-
 cpp/src/arrow/extension_type_test.cc          |  19 +---
 .../integration/json_integration_test.cc      |   2 +-
 cpp/src/arrow/ipc/test_common.cc              |  35 ++++--
 cpp/src/arrow/ipc/test_common.h               |   3 +
 cpp/src/arrow/scalar_test.cc                  |   5 +-
 cpp/src/arrow/testing/extension_type.h        |   6 +-
 cpp/src/arrow/testing/gtest_util.cc           |  16 ++-
 dev/archery/archery/integration/datagen.py    |   2 +-
 docs/source/format/CanonicalExtensions.rst    |   2 +
 docs/source/status.rst                        |   2 +-
 python/pyarrow/__init__.py                    |  18 +--
 python/pyarrow/array.pxi                      |   6 +
 python/pyarrow/includes/libarrow.pxd          |  10 ++
 python/pyarrow/lib.pxd                        |   3 +
 python/pyarrow/public-api.pxi                 |  11 +-
 python/pyarrow/scalar.pxi                     |  10 ++
 python/pyarrow/src/arrow/python/gdb.cc        |  27 +----
 python/pyarrow/tests/extensions.pyx           |   2 +-
 python/pyarrow/tests/test_extension_type.py   | 105 ++++++++++++------
 python/pyarrow/tests/test_gdb.py              |   8 +-
 python/pyarrow/types.pxi                      |  34 ++++++
 29 files changed, 412 insertions(+), 132 deletions(-)
 create mode 100644 cpp/src/arrow/extension/uuid.cc
 create mode 100644 cpp/src/arrow/extension/uuid.h
 create mode 100644 cpp/src/arrow/extension/uuid_test.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 89f28ee416ede..6b0ac8c23c75a 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -375,6 +375,7 @@ set(ARROW_SRCS
     device.cc
     extension_type.cc
     extension/bool8.cc
+    extension/uuid.cc
     pretty_print.cc
     record_batch.cc
     result.cc
@@ -1225,6 +1226,7 @@ add_subdirectory(testing)
 add_subdirectory(array)
 add_subdirectory(c)
 add_subdirectory(compute)
+add_subdirectory(extension)
 add_subdirectory(io)
 add_subdirectory(tensor)
 add_subdirectory(util)
@@ -1267,7 +1269,6 @@ endif()
 
 if(ARROW_JSON)
   add_subdirectory(json)
-  add_subdirectory(extension)
 endif()
 
 if(ARROW_ORC)
diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc
index 9065e286a2228..76ad9c7d650eb 100644
--- a/cpp/src/arrow/acero/hash_join_node_test.cc
+++ b/cpp/src/arrow/acero/hash_join_node_test.cc
@@ -29,6 +29,7 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/row_encoder_internal.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/testing/extension_type.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/extension/CMakeLists.txt b/cpp/src/arrow/extension/CMakeLists.txt
index 5cb4bc77af2a4..065ea3f1ddb16 100644
--- a/cpp/src/arrow/extension/CMakeLists.txt
+++ b/cpp/src/arrow/extension/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set(CANONICAL_EXTENSION_TESTS bool8_test.cc)
+set(CANONICAL_EXTENSION_TESTS bool8_test.cc uuid_test.cc)
 
 if(ARROW_JSON)
   list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc opaque_test.cc)
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index 3fd39a11ff50d..842a78e1a4f7a 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -23,7 +23,7 @@
 #include "arrow/array/array_primitive.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
-#include "arrow/ipc/writer.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/record_batch.h"
 #include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
@@ -33,6 +33,7 @@
 namespace arrow {
 
 using FixedShapeTensorType = extension::FixedShapeTensorType;
+using arrow::ipc::test::RoundtripBatch;
 using extension::fixed_shape_tensor;
 using extension::FixedShapeTensorArray;
 
@@ -71,20 +72,6 @@ class TestExtensionType : public ::testing::Test {
   std::string serialized_;
 };
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, CheckDummyRegistration) {
   // We need a registered dummy type at runtime to allow for IPC deserialization
   auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
diff --git a/cpp/src/arrow/extension/uuid.cc b/cpp/src/arrow/extension/uuid.cc
new file mode 100644
index 0000000000000..43b917a17f8b2
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include "arrow/extension_type.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/extension/uuid.h"
+
+namespace arrow::extension {
+
+bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+  return (other.extension_name() == this->extension_name());
+}
+
+std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.uuid",
+            static_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<UuidArray>(data);
+}
+
+Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
+  if (!serialized.empty()) {
+    return Status::Invalid("Unexpected serialized metadata: '", serialized, "'");
+  }
+  if (!storage_type->Equals(*fixed_size_binary(16))) {
+    return Status::Invalid("Invalid storage type for UuidType: ",
+                           storage_type->ToString());
+  }
+  return std::make_shared<UuidType>();
+}
+
+std::string UuidType::ToString(bool show_metadata) const {
+  std::stringstream ss;
+  ss << "extension<" << this->extension_name() << ">";
+  return ss.str();
+}
+
+std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid.h b/cpp/src/arrow/extension/uuid.h
new file mode 100644
index 0000000000000..42bb21cf0b2ed
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+/// \brief UuidArray stores array of UUIDs. Underlying storage type is
+/// FixedSizeBinary(16).
+class ARROW_EXPORT UuidArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief UuidType is a canonical arrow extension type for UUIDs.
+/// UUIDs are stored as FixedSizeBinary(16) with big-endian notation and this
+/// does not interpret the bytes in any way. Specific UUID version is not
+/// required or guaranteed.
+class ARROW_EXPORT UuidType : public ExtensionType {
+ public:
+  /// \brief Construct a UuidType.
+  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+
+  std::string extension_name() const override { return "arrow.uuid"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  /// Create a UuidArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized) const override;
+
+  std::string Serialize() const override { return ""; }
+
+  /// \brief Create a UuidType instance
+  static Result<std::shared_ptr<DataType>> Make() { return std::make_shared<UuidType>(); }
+};
+
+/// \brief Return a UuidType instance.
+ARROW_EXPORT std::shared_ptr<DataType> uuid();
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid_test.cc b/cpp/src/arrow/extension/uuid_test.cc
new file mode 100644
index 0000000000000..3bbb6eeb4aef1
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid_test.cc
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/uuid.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+#include "arrow/testing/extension_type.h"
+
+namespace arrow {
+
+using arrow::ipc::test::RoundtripBatch;
+
+TEST(TestUuuidExtensionType, ExtensionTypeTest) {
+  auto type = uuid();
+  ASSERT_EQ(type->id(), Type::EXTENSION);
+
+  const auto& ext_type = static_cast<const ExtensionType&>(*type);
+  std::string serialized = ext_type.Serialize();
+
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       ext_type.Deserialize(fixed_size_binary(16), serialized));
+  ASSERT_TRUE(deserialized->Equals(*type));
+  ASSERT_FALSE(deserialized->Equals(*fixed_size_binary(16)));
+}
+
+TEST(TestUuuidExtensionType, RoundtripBatch) {
+  auto ext_type = extension::uuid();
+  auto exact_ext_type = internal::checked_pointer_cast<extension::UuidType>(ext_type);
+  auto arr = ArrayFromJSON(fixed_size_binary(16), R"(["abcdefghijklmnop", null])");
+  auto ext_arr = ExtensionType::WrapArray(ext_type, arr);
+
+  // Pass extension array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch;
+  auto ext_field = field(/*name=*/"f0", /*type=*/ext_type);
+  auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
+  RoundtripBatch(batch, &read_batch);
+  CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
+
+  // Pass extension metadata and storage array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch2;
+  auto ext_metadata =
+      key_value_metadata({{"ARROW:extension:name", exact_ext_type->extension_name()},
+                          {"ARROW:extension:metadata", ""}});
+  ext_field = field(/*name=*/"f0", /*type=*/exact_ext_type->storage_type(),
+                    /*nullable=*/true, /*metadata=*/ext_metadata);
+  auto batch2 = RecordBatch::Make(schema({ext_field}), arr->length(), {arr});
+  RoundtripBatch(batch2, &read_batch2);
+  CompareBatch(*batch, *read_batch2, /*compare_metadata=*/true);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 83c7ebed4f319..fc220f73a6beb 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -32,6 +32,7 @@
 #include "arrow/extension/fixed_shape_tensor.h"
 #include "arrow/extension/opaque.h"
 #endif
+#include "arrow/extension/uuid.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
@@ -147,14 +148,13 @@ static void CreateGlobalRegistry() {
   // Register canonical extension types
 
   g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
-  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8()};
+  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8(), extension::uuid()};
 
 #ifdef ARROW_JSON
   ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
   ext_types.push_back(extension::opaque(null(), "", ""));
 #endif
 
-  // Register canonical extension types
   for (const auto& ext_type : ext_types) {
     ARROW_CHECK_OK(
         g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
diff --git a/cpp/src/arrow/extension_type_test.cc b/cpp/src/arrow/extension_type_test.cc
index f104c984a64b4..f49ffc5cba553 100644
--- a/cpp/src/arrow/extension_type_test.cc
+++ b/cpp/src/arrow/extension_type_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/options.h"
 #include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -41,6 +42,8 @@
 
 namespace arrow {
 
+using arrow::ipc::test::RoundtripBatch;
+
 class Parametric1Array : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
@@ -178,7 +181,7 @@ class ExtStructType : public ExtensionType {
 
 class TestExtensionType : public ::testing::Test {
  public:
-  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<UuidType>())); }
+  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<ExampleUuidType>())); }
 
   void TearDown() {
     if (GetExtensionType("uuid")) {
@@ -211,20 +214,6 @@ TEST_F(TestExtensionType, ExtensionTypeTest) {
   ASSERT_EQ(deserialized->byte_width(), 16);
 }
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, IpcRoundtrip) {
   auto ext_arr = ExampleUuid();
   auto batch = RecordBatch::Make(schema({field("f0", uuid())}), 4, {ext_arr});
diff --git a/cpp/src/arrow/integration/json_integration_test.cc b/cpp/src/arrow/integration/json_integration_test.cc
index 9b56928c68843..0e84ea6124d5d 100644
--- a/cpp/src/arrow/integration/json_integration_test.cc
+++ b/cpp/src/arrow/integration/json_integration_test.cc
@@ -1046,7 +1046,7 @@ TEST(TestJsonFileReadWrite, JsonExample2) {
 
     auto storage_array =
         ArrayFromJSON(fixed_size_binary(16), R"(["0123456789abcdef", null])");
-    AssertArraysEqual(*batch->column(0), UuidArray(uuid_type, storage_array));
+    AssertArraysEqual(*batch->column(0), ExampleUuidArray(uuid_type, storage_array));
 
     AssertArraysEqual(*batch->column(1), NullArray(2));
   }
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index 87c02e2d87a1e..fb4f6bd8eadcf 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -27,8 +27,10 @@
 #include "arrow/array.h"
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
-#include "arrow/array/builder_time.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
 #include "arrow/ipc/test_common.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -242,11 +244,11 @@ Status MakeRandomBooleanArray(const int length, bool include_nulls,
                               std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values(length);
   random_null_bytes(length, 0.5, values.data());
-  ARROW_ASSIGN_OR_RAISE(auto data, internal::BytesToBits(values));
+  ARROW_ASSIGN_OR_RAISE(auto data, arrow::internal::BytesToBits(values));
 
   if (include_nulls) {
     std::vector<uint8_t> valid_bytes(length);
-    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(valid_bytes));
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(valid_bytes));
     random_null_bytes(length, 0.1, valid_bytes.data());
     *out = std::make_shared<BooleanArray>(length, data, null_bitmap, -1);
   } else {
@@ -596,7 +598,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
   std::shared_ptr<Array> no_nulls(new StructArray(type, list_batch->num_rows(), columns));
   std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
   null_bytes[0] = 0;
-  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(null_bytes));
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(null_bytes));
   std::shared_ptr<Array> with_nulls(
       new StructArray(type, list_batch->num_rows(), columns, null_bitmap, 1));
 
@@ -1088,9 +1090,9 @@ Status MakeUuid(std::shared_ptr<RecordBatch>* out) {
   auto f1 = field("f1", uuid_type, /*nullable=*/false);
   auto schema = ::arrow::schema({f0, f1});
 
-  auto a0 = std::make_shared<UuidArray>(
+  auto a0 = std::make_shared<ExampleUuidArray>(
       uuid_type, ArrayFromJSON(storage_type, R"(["0123456789abcdef", null])"));
-  auto a1 = std::make_shared<UuidArray>(
+  auto a1 = std::make_shared<ExampleUuidArray>(
       uuid_type,
       ArrayFromJSON(storage_type, R"(["ZYXWVUTSRQPONMLK", "JIHGFEDBA9876543"])"));
 
@@ -1176,12 +1178,13 @@ enable_if_t<std::is_floating_point<CValueType>::value, void> FillRandomData(
 Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed) {
-  const auto& element_type = internal::checked_cast<const FixedWidthType&>(*type);
+  const auto& element_type = arrow::internal::checked_cast<const FixedWidthType&>(*type);
   std::vector<int64_t> strides;
   if (row_major_p) {
-    RETURN_NOT_OK(internal::ComputeRowMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(arrow::internal::ComputeRowMajorStrides(element_type, shape, &strides));
   } else {
-    RETURN_NOT_OK(internal::ComputeColumnMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(
+        arrow::internal::ComputeColumnMajorStrides(element_type, shape, &strides));
   }
 
   const int64_t element_size = element_type.bit_width() / CHAR_BIT;
@@ -1233,6 +1236,20 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
   return Tensor::Make(type, buf, shape, strides).Value(out);
 }
 
+void RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                    std::shared_ptr<RecordBatch>* out) {
+  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
+                                        out_stream.get()));
+
+  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
+  ASSERT_OK(batch_reader->ReadNext(out));
+}
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/test_common.h b/cpp/src/arrow/ipc/test_common.h
index db8613cbb1e6a..9b7e7f13e3a8e 100644
--- a/cpp/src/arrow/ipc/test_common.h
+++ b/cpp/src/arrow/ipc/test_common.h
@@ -184,6 +184,9 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed = 0);
 
+ARROW_TESTING_EXPORT void RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                                         std::shared_ptr<RecordBatch>* out);
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index 104a5697b5727..e9ec13e98b4ee 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -43,7 +43,6 @@ namespace arrow {
 
 using compute::Cast;
 using compute::CastOptions;
-
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
@@ -2038,7 +2037,7 @@ class TestExtensionScalar : public ::testing::Test {
   void SetUp() {
     type_ = uuid();
     storage_type_ = fixed_size_binary(16);
-    uuid_type_ = checked_cast<const UuidType*>(type_.get());
+    uuid_type_ = checked_cast<const ExampleUuidType*>(type_.get());
   }
 
  protected:
@@ -2049,7 +2048,7 @@ class TestExtensionScalar : public ::testing::Test {
   }
 
   std::shared_ptr<DataType> type_, storage_type_;
-  const UuidType* uuid_type_{nullptr};
+  const ExampleUuidType* uuid_type_{nullptr};
 
   const std::string_view uuid_string1_{UUID_STRING1};
   const std::string_view uuid_string2_{UUID_STRING2};
diff --git a/cpp/src/arrow/testing/extension_type.h b/cpp/src/arrow/testing/extension_type.h
index 6515631f202ae..a4526e31c2b93 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -27,14 +27,14 @@
 
 namespace arrow {
 
-class ARROW_TESTING_EXPORT UuidArray : public ExtensionArray {
+class ARROW_TESTING_EXPORT ExampleUuidArray : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
 };
 
-class ARROW_TESTING_EXPORT UuidType : public ExtensionType {
+class ARROW_TESTING_EXPORT ExampleUuidType : public ExtensionType {
  public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+  ExampleUuidType() : ExtensionType(fixed_size_binary(16)) {}
 
   std::string extension_name() const override { return "uuid"; }
 
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 95de16c715f19..ae2e53b30a3ee 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -49,9 +49,13 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/datum.h"
+#include "arrow/io/memory.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
 #include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
@@ -847,17 +851,17 @@ Future<> SleepABitAsync() {
 ///////////////////////////////////////////////////////////////////////////
 // Extension types
 
-bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+bool ExampleUuidType::ExtensionEquals(const ExtensionType& other) const {
   return (other.extension_name() == this->extension_name());
 }
 
-std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+std::shared_ptr<Array> ExampleUuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
   DCHECK_EQ("uuid", static_cast<const ExtensionType&>(*data->type).extension_name());
-  return std::make_shared<UuidArray>(data);
+  return std::make_shared<ExampleUuidArray>(data);
 }
 
-Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+Result<std::shared_ptr<DataType>> ExampleUuidType::Deserialize(
     std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
   if (serialized != "uuid-serialized") {
     return Status::Invalid("Type identifier did not match: '", serialized, "'");
@@ -866,7 +870,7 @@ Result<std::shared_ptr<DataType>> UuidType::Deserialize(
     return Status::Invalid("Invalid storage type for UuidType: ",
                            storage_type->ToString());
   }
-  return std::make_shared<UuidType>();
+  return std::make_shared<ExampleUuidType>();
 }
 
 bool SmallintType::ExtensionEquals(const ExtensionType& other) const {
@@ -982,7 +986,7 @@ Result<std::shared_ptr<DataType>> Complex128Type::Deserialize(
   return std::make_shared<Complex128Type>();
 }
 
-std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+std::shared_ptr<DataType> uuid() { return std::make_shared<ExampleUuidType>(); }
 
 std::shared_ptr<DataType> smallint() { return std::make_shared<SmallintType>(); }
 
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index d395d26cb71d3..f63aa0d95a484 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1845,7 +1845,7 @@ def generate_nested_dictionary_case():
 def generate_extension_case():
     dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0')
 
-    uuid_type = ExtensionType('uuid', 'uuid-serialized',
+    uuid_type = ExtensionType('arrow.uuid', '',
                               FixedSizeBinaryField('', 16))
     dict_ext_type = ExtensionType(
         'dict-extension', 'dict-extension-serialized',
diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst
index 5658f949ceeaa..1106f8aaffdd3 100644
--- a/docs/source/format/CanonicalExtensions.rst
+++ b/docs/source/format/CanonicalExtensions.rst
@@ -272,6 +272,8 @@ JSON
   In the future, additional fields may be added, but they are not required
   to interpret the array.
 
+.. _uuid_extension:
+
 UUID
 ====
 
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 5e2c2cc19c890..b685d4bbf8add 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -121,7 +121,7 @@ Data Types
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | JSON                  |       |       | ✓     |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| UUID                  |       |       | ✓     |            |       |       |       |       |
+| UUID                  | ✓     |       | ✓     |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | 8-bit Boolean         | ✓     |       | ✓     |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 807bcdc315036..d31c93119b73a 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -172,9 +172,7 @@ def print_entry(label, value):
                          union, sparse_union, dense_union,
                          dictionary,
                          run_end_encoded,
-                         fixed_shape_tensor,
-                         opaque,
-                         bool8,
+                         bool8, fixed_shape_tensor, opaque, uuid,
                          field,
                          type_for_alias,
                          DataType, DictionaryType, StructType,
@@ -184,8 +182,9 @@ def print_entry(label, value):
                          TimestampType, Time32Type, Time64Type, DurationType,
                          FixedSizeBinaryType, Decimal128Type, Decimal256Type,
                          BaseExtensionType, ExtensionType,
-                         RunEndEncodedType, FixedShapeTensorType, OpaqueType,
-                         Bool8Type, PyExtensionType, UnknownExtensionType,
+                         RunEndEncodedType, Bool8Type, FixedShapeTensorType,
+                         OpaqueType, UuidType,
+                         PyExtensionType, UnknownExtensionType,
                          register_extension_type, unregister_extension_type,
                          DictionaryMemo,
                          KeyValueMetadata,
@@ -218,8 +217,9 @@ def print_entry(label, value):
                          Time32Array, Time64Array, DurationArray,
                          MonthDayNanoIntervalArray,
                          Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
-                         RunEndEncodedArray, FixedShapeTensorArray, OpaqueArray,
-                         Bool8Array, scalar, NA, _NULL as NULL, Scalar,
+                         RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
+                         OpaqueArray, UuidArray,
+                         scalar, NA, _NULL as NULL, Scalar,
                          NullScalar, BooleanScalar,
                          Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
                          UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
@@ -235,8 +235,8 @@ def print_entry(label, value):
                          StringScalar, LargeStringScalar, StringViewScalar,
                          FixedSizeBinaryScalar, DictionaryScalar,
                          MapScalar, StructScalar, UnionScalar,
-                         RunEndEncodedScalar, ExtensionScalar,
-                         FixedShapeTensorScalar, OpaqueScalar, Bool8Scalar)
+                         RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
+                         FixedShapeTensorScalar, OpaqueScalar, UuidScalar)
 
 # Buffers, allocation
 from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 77d6c9c06d2de..1587de0e6b744 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -4338,6 +4338,12 @@ cdef class ExtensionArray(Array):
         return result
 
 
+class UuidArray(ExtensionArray):
+    """
+    Concrete class for Arrow arrays of UUID data type.
+    """
+
+
 cdef class FixedShapeTensorArray(ExtensionArray):
     """
     Concrete class for fixed shape tensor extension arrays.
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6f510cfc0c06c..c2346750a196f 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2865,6 +2865,16 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":
         shared_ptr[CArray] storage()
 
 
+cdef extern from "arrow/extension/uuid.h" namespace "arrow::extension" nogil:
+    cdef cppclass CUuidType" arrow::extension::UuidType"(CExtensionType):
+
+        @staticmethod
+        CResult[shared_ptr[CDataType]] Make()
+
+    cdef cppclass CUuidArray" arrow::extension::UuidArray"(CExtensionArray):
+        pass
+
+
 cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension" nogil:
     cdef cppclass CFixedShapeTensorType \
             " arrow::extension::FixedShapeTensorType"(CExtensionType):
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index a7c3b496a0045..5c3d981c3adc7 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -222,6 +222,9 @@ cdef class OpaqueType(BaseExtensionType):
     cdef:
         const COpaqueType* opaque_ext_type
 
+cdef class UuidType(BaseExtensionType):
+    cdef:
+        const CUuidType* uuid_ext_type
 
 cdef class PyExtensionType(ExtensionType):
     pass
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 19a26bd6c683d..d3e2ff2e99d91 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -120,14 +120,17 @@ cdef api object pyarrow_wrap_data_type(
     elif type.get().id() == _Type_EXTENSION:
         ext_type = <const CExtensionType*> type.get()
         cpy_ext_type = dynamic_cast[_CPyExtensionTypePtr](ext_type)
+        extension_name = ext_type.extension_name()
         if cpy_ext_type != nullptr:
             return cpy_ext_type.GetInstance()
-        elif ext_type.extension_name() == b"arrow.fixed_shape_tensor":
+        elif extension_name == b"arrow.bool8":
+            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.fixed_shape_tensor":
             out = FixedShapeTensorType.__new__(FixedShapeTensorType)
-        elif ext_type.extension_name() == b"arrow.opaque":
+        elif extension_name == b"arrow.opaque":
             out = OpaqueType.__new__(OpaqueType)
-        elif ext_type.extension_name() == b"arrow.bool8":
-            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.uuid":
+            out = UuidType.__new__(UuidType)
         else:
             out = BaseExtensionType.__new__(BaseExtensionType)
     else:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 72ae2aee5f8b3..68f77832c4342 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -17,6 +17,7 @@
 
 import collections
 from cython cimport binding
+from uuid import UUID
 
 
 cdef class Scalar(_Weakrefable):
@@ -1043,6 +1044,15 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+class UuidScalar(ExtensionScalar):
+    """
+    Concrete class for Uuid extension scalar.
+    """
+
+    def as_py(self):
+        return None if self.value is None else UUID(bytes=self.value.as_py())
+
+
 cdef class FixedShapeTensorScalar(ExtensionScalar):
     """
     Concrete class for fixed shape tensor extension scalar.
diff --git a/python/pyarrow/src/arrow/python/gdb.cc b/python/pyarrow/src/arrow/python/gdb.cc
index 6941769e4efe8..7c58bae3342c2 100644
--- a/python/pyarrow/src/arrow/python/gdb.cc
+++ b/python/pyarrow/src/arrow/python/gdb.cc
@@ -22,7 +22,7 @@
 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/datum.h"
-#include "arrow/extension_type.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/ipc/json_simple.h"
 #include "arrow/python/gdb.h"
 #include "arrow/record_batch.h"
@@ -37,6 +37,8 @@
 
 namespace arrow {
 
+using extension::uuid;
+using extension::UuidType;
 using ipc::internal::json::ArrayFromJSON;
 using ipc::internal::json::ChunkedArrayFromJSON;
 using ipc::internal::json::ScalarFromJSON;
@@ -56,29 +58,6 @@ class CustomStatusDetail : public StatusDetail {
   std::string ToString() const override { return "This is a detail"; }
 };
 
-class UuidType : public ExtensionType {
- public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
-
-  std::string extension_name() const override { return "uuid"; }
-
-  bool ExtensionEquals(const ExtensionType& other) const override {
-    return (other.extension_name() == this->extension_name());
-  }
-
-  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
-    return std::make_shared<ExtensionArray>(data);
-  }
-
-  Result<std::shared_ptr<DataType>> Deserialize(
-      std::shared_ptr<DataType> storage_type,
-      const std::string& serialized) const override {
-    return Status::NotImplemented("");
-  }
-
-  std::string Serialize() const override { return "uuid-serialized"; }
-};
-
 std::shared_ptr<Array> SliceArrayFromJSON(const std::shared_ptr<DataType>& ty,
                                           std::string_view json, int64_t offset = 0,
                                           int64_t length = -1) {
diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx
index c1bf9aae1ec03..309b574dc0264 100644
--- a/python/pyarrow/tests/extensions.pyx
+++ b/python/pyarrow/tests/extensions.pyx
@@ -37,7 +37,7 @@ cdef extern from * namespace "arrow::py" nogil:
     class UuidType : public ExtensionType {
     public:
         UuidType() : ExtensionType(fixed_size_binary(16)) {}
-        std::string extension_name() const override { return "uuid"; }
+        std::string extension_name() const override { return "example-uuid"; }
 
         bool ExtensionEquals(const ExtensionType& other) const override {
             return other.extension_name() == this->extension_name();
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 0d50c467e96bd..aacbd2cb6e756 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -95,18 +95,21 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidScalarType(pa.ExtensionScalar):
+class ExampleUuidScalarType(pa.ExtensionScalar):
     def as_py(self):
         return None if self.value is None else UUID(bytes=self.value.as_py())
 
 
-class UuidType(pa.ExtensionType):
+class ExampleUuidType(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType')
+
+    def __reduce__(self):
+        return ExampleUuidType, ()
 
     def __arrow_ext_scalar_class__(self):
-        return UuidScalarType
+        return ExampleUuidScalarType
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -116,10 +119,10 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidType2(pa.ExtensionType):
+class ExampleUuidType2(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType2')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType2')
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -250,8 +253,8 @@ def ipc_read_batch(buf):
 
 
 def test_ext_type_basics():
-    ty = UuidType()
-    assert ty.extension_name == "pyarrow.tests.UuidType"
+    ty = ExampleUuidType()
+    assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
 
 
 def test_ext_type_str():
@@ -267,16 +270,16 @@ def test_ext_type_repr():
 
 
 def test_ext_type_lifetime():
-    ty = UuidType()
+    ty = ExampleUuidType()
     wr = weakref.ref(ty)
     del ty
     assert wr() is None
 
 
 def test_ext_type_storage_type():
-    ty = UuidType()
+    ty = ExampleUuidType()
     assert ty.storage_type == pa.binary(16)
-    assert ty.__class__ is UuidType
+    assert ty.__class__ is ExampleUuidType
     ty = ParamExtType(5)
     assert ty.storage_type == pa.binary(5)
     assert ty.__class__ is ParamExtType
@@ -284,7 +287,7 @@ def test_ext_type_storage_type():
 
 def test_ext_type_byte_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.byte_width == 16
     ty = ParamExtType(5)
     assert ty.byte_width == 5
@@ -297,7 +300,7 @@ def test_ext_type_byte_width():
 
 def test_ext_type_bit_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.bit_width == 128
     ty = ParamExtType(5)
     assert ty.bit_width == 40
@@ -309,7 +312,7 @@ def test_ext_type_bit_width():
 
 
 def test_ext_type_as_py():
-    ty = UuidType()
+    ty = ExampleUuidType()
     expected = uuid4()
     scalar = pa.ExtensionScalar.from_storage(ty, expected.bytes)
     assert scalar.as_py() == expected
@@ -342,12 +345,22 @@ def test_ext_type_as_py():
 
 def test_uuid_type_pickle(pickle_module):
     for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
-        ty = UuidType()
+        ty = ExampleUuidType()
         ser = pickle_module.dumps(ty, protocol=proto)
         del ty
         ty = pickle_module.loads(ser)
         wr = weakref.ref(ty)
-        assert ty.extension_name == "pyarrow.tests.UuidType"
+        assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
+        del ty
+        assert wr() is None
+
+    for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
+        ty = pa.uuid()
+        ser = pickle_module.dumps(ty, protocol=proto)
+        del ty
+        ty = pickle_module.loads(ser)
+        wr = weakref.ref(ty)
+        assert ty.extension_name == "arrow.uuid"
         del ty
         assert wr() is None
 
@@ -358,8 +371,8 @@ def test_ext_type_equality():
     c = ParamExtType(6)
     assert a != b
     assert b == c
-    d = UuidType()
-    e = UuidType()
+    d = ExampleUuidType()
+    e = ExampleUuidType()
     assert a != d
     assert d == e
 
@@ -403,7 +416,7 @@ def test_ext_array_equality():
     storage1 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage2 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage3 = pa.array([], type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
 
     a = pa.ExtensionArray.from_storage(ty1, storage1)
@@ -451,9 +464,9 @@ def test_ext_scalar_from_array():
     data = [b"0123456789abcdef", b"0123456789abcdef",
             b"zyxwvutsrqponmlk", None]
     storage = pa.array(data, type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
-    ty3 = UuidType2()
+    ty3 = ExampleUuidType2()
 
     a = pa.ExtensionArray.from_storage(ty1, storage)
     b = pa.ExtensionArray.from_storage(ty2, storage)
@@ -462,9 +475,9 @@ def test_ext_scalar_from_array():
     scalars_a = list(a)
     assert len(scalars_a) == 4
 
-    assert ty1.__arrow_ext_scalar_class__() == UuidScalarType
-    assert isinstance(a[0], UuidScalarType)
-    assert isinstance(scalars_a[0], UuidScalarType)
+    assert ty1.__arrow_ext_scalar_class__() == ExampleUuidScalarType
+    assert isinstance(a[0], ExampleUuidScalarType)
+    assert isinstance(scalars_a[0], ExampleUuidScalarType)
 
     for s, val in zip(scalars_a, data):
         assert isinstance(s, pa.ExtensionScalar)
@@ -505,7 +518,7 @@ def test_ext_scalar_from_array():
 
 
 def test_ext_scalar_from_storage():
-    ty = UuidType()
+    ty = ExampleUuidType()
 
     s = pa.ExtensionScalar.from_storage(ty, None)
     assert isinstance(s, pa.ExtensionScalar)
@@ -706,14 +719,14 @@ def test_cast_between_extension_types():
     tiny_int_arr.cast(pa.int64()).cast(IntegerType())
 
     # Between the same extension types is okay
-    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(UuidType())
-    out = array.cast(UuidType())
-    assert out.type == UuidType()
+    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(ExampleUuidType())
+    out = array.cast(ExampleUuidType())
+    assert out.type == ExampleUuidType()
 
     # Will still fail casting between extensions who share storage type,
     # can only cast between exactly the same extension types.
     with pytest.raises(TypeError, match='Casting from *'):
-        array.cast(UuidType2())
+        array.cast(ExampleUuidType2())
 
 
 def test_cast_to_extension_with_extension_storage():
@@ -744,10 +757,10 @@ def test_cast_nested_extension_types(data, type_factory):
 
 def test_casting_dict_array_to_extension_type():
     storage = pa.array([b"0123456789abcdef"], type=pa.binary(16))
-    arr = pa.ExtensionArray.from_storage(UuidType(), storage)
+    arr = pa.ExtensionArray.from_storage(ExampleUuidType(), storage)
     dict_arr = pa.DictionaryArray.from_arrays(pa.array([0, 0], pa.int32()),
                                               arr)
-    out = dict_arr.cast(UuidType())
+    out = dict_arr.cast(ExampleUuidType())
     assert isinstance(out, pa.ExtensionArray)
     assert out.to_pylist() == [UUID('30313233-3435-3637-3839-616263646566'),
                                UUID('30313233-3435-3637-3839-616263646566')]
@@ -1347,7 +1360,7 @@ def test_cpp_extension_in_python(tmpdir):
     mod = __import__('extensions')
 
     uuid_type = mod._make_uuid_type()
-    assert uuid_type.extension_name == "uuid"
+    assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
     array = mod._make_uuid_array()
@@ -1356,6 +1369,31 @@ def test_cpp_extension_in_python(tmpdir):
     assert array[0].as_py() == b'abcdefghijklmno0'
     assert array[1].as_py() == b'0onmlkjihgfedcba'
 
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["example-uuid"]))
+
+    batch = ipc_read_batch(buf)
+    reconstructed_array = batch.column(0)
+    assert reconstructed_array.type == uuid_type
+    assert reconstructed_array == array
+
+
+def test_uuid_extension():
+    data = [b"0123456789abcdef", b"0123456789abcdef",
+            b"zyxwvutsrqponmlk", None]
+
+    uuid_type = pa.uuid()
+    assert uuid_type.extension_name == "arrow.uuid"
+    assert uuid_type.storage_type == pa.binary(16)
+    assert uuid_type.__class__ is pa.UuidType
+
+    storage = pa.array(data, pa.binary(16))
+    array = pa.ExtensionArray.from_storage(uuid_type, storage)
+    assert array.type == uuid_type
+
+    assert array.to_pylist() == [x if x is None else UUID(bytes=x) for x in data]
+    assert array[0].as_py() == UUID(bytes=data[0])
+    assert array[3].as_py() is None
+
     buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"]))
 
     batch = ipc_read_batch(buf)
@@ -1363,6 +1401,9 @@ def test_cpp_extension_in_python(tmpdir):
     assert reconstructed_array.type == uuid_type
     assert reconstructed_array == array
 
+    assert uuid_type.__arrow_ext_scalar_class__() == pa.UuidScalar
+    assert isinstance(array[0], pa.UuidScalar)
+
 
 def test_tensor_type():
     tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 3])
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 0d12d710dcf64..2ac2f55754fe5 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -409,7 +409,7 @@ def test_types_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -447,7 +447,7 @@ def test_types_heap(gdb_arrow):
 
     check_heap_repr(
         gdb_arrow, "heap_uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -716,12 +716,12 @@ def test_scalars_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "extension_scalar",
-        ('arrow::ExtensionScalar of type "extension<uuid>", '
+        ('arrow::ExtensionScalar of type "extension<arrow.uuid>", '
          'value arrow::FixedSizeBinaryScalar of size 16, '
          'value "0123456789abcdef"'))
     check_stack_repr(
         gdb_arrow, "extension_scalar_null",
-        'arrow::ExtensionScalar of type "extension<uuid>", null value')
+        'arrow::ExtensionScalar of type "extension<arrow.uuid>", null value')
 
 
 def test_scalars_heap(gdb_arrow):
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 563782f0c2643..f83ecc3aa4326 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1765,6 +1765,25 @@ cdef class ExtensionType(BaseExtensionType):
         return ExtensionScalar
 
 
+cdef class UuidType(BaseExtensionType):
+    """
+    Concrete class for UUID extension type.
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        BaseExtensionType.init(self, type)
+        self.uuid_ext_type = <const CUuidType*> type.get()
+
+    def __arrow_ext_class__(self):
+        return UuidArray
+
+    def __reduce__(self):
+        return uuid, ()
+
+    def __arrow_ext_scalar_class__(self):
+        return UuidScalar
+
+
 cdef class FixedShapeTensorType(BaseExtensionType):
     """
     Concrete class for fixed shape tensor extension type.
@@ -5208,6 +5227,21 @@ def run_end_encoded(run_end_type, value_type):
     return pyarrow_wrap_data_type(ree_type)
 
 
+def uuid():
+    """
+    Create UuidType instance.
+
+    Returns
+    -------
+    type : UuidType
+    """
+
+    cdef UuidType out = UuidType.__new__(UuidType)
+    c_uuid_ext_type = GetResultValue(CUuidType.Make())
+    out.init(c_uuid_ext_type)
+    return out
+
+
 def fixed_shape_tensor(DataType value_type, shape, dim_names=None, permutation=None):
     """
     Create instance of fixed shape tensor extension type with shape and optional

From 8eb7bd4115da0027aad6362f0fe0901ec44b0616 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:12:57 +0900
Subject: [PATCH 17/63] MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.24.1
 to 2.25.0 in /go (#43829)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [github.com/hamba/avro/v2](https://github.com/hamba/avro) from 2.24.1 to 2.25.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/hamba/avro/releases">github.com/hamba/avro/v2's releases</a>.</em></p>
<blockquote>
<h2>v2.25.0</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: bump golang.org/x/tools from 0.23.0 to 0.24.0 in the all group by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/431">hamba/avro#431</a></li>
<li>feat: support custom logical types by <a href="https://github.com/Emptyless"><code>@​Emptyless</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/429">hamba/avro#429</a></li>
<li>chore: support go 1.23 by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/433">hamba/avro#433</a></li>
<li>docs: add who use case by <a href="https://github.com/haoxins"><code>@​haoxins</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/434">hamba/avro#434</a></li>
<li>chore: update decoder.go by <a href="https://github.com/kasperlewau"><code>@​kasperlewau</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/436">hamba/avro#436</a></li>
<li>fix: ref already seen schemas in deref walk by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/438">hamba/avro#438</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/Emptyless"><code>@​Emptyless</code></a> made their first contribution in <a href="https://redirect.github.com/hamba/avro/pull/429">hamba/avro#429</a></li>
<li><a href="https://github.com/kasperlewau"><code>@​kasperlewau</code></a> made their first contribution in <a href="https://redirect.github.com/hamba/avro/pull/436">hamba/avro#436</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/hamba/avro/compare/v2.24.1...v2.24.2">https://github.com/hamba/avro/compare/v2.24.1...v2.24.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/hamba/avro/commit/c2ac60e27f027cecefd33df085cd7dd13ed2b4f5"><code>c2ac60e</code></a> fix: ref already seen schemas in deref walk (<a href="https://redirect.github.com/hamba/avro/issues/438">#438</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/917a77ee07e81c28cc010d261115b8f86b7be234"><code>917a77e</code></a> chore: update decoder.go (<a href="https://redirect.github.com/hamba/avro/issues/436">#436</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/3a276f315d119ea178a75b61e2ddf1d1d425f3bd"><code>3a276f3</code></a> docs: Add who use case (<a href="https://redirect.github.com/hamba/avro/issues/434">#434</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/fdb7050201e160aaeacc303f1b4d26111f1d81c9"><code>fdb7050</code></a> chore: support go 1.23, remove go 1.21 (<a href="https://redirect.github.com/hamba/avro/issues/433">#433</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/51c1d356a5f16b0d16de083d7811be8e6c92e66f"><code>51c1d35</code></a> feat: support custom logical types (<a href="https://redirect.github.com/hamba/avro/issues/429">#429</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/2623a40ea9178daaf1ec6876c0906029bcf83827"><code>2623a40</code></a> chore: bump golang.org/x/tools from 0.23.0 to 0.24.0 in the all group (<a href="https://redirect.github.com/hamba/avro/issues/431">#431</a>)</li>
<li>See full diff in <a href="https://github.com/hamba/avro/compare/v2.24.1...v2.25.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/hamba/avro/v2&package-manager=go_modules&previous-version=2.24.1&new-version=2.25.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index 9f4222a541bb6..97ac05685970c 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -47,7 +47,7 @@ require (
 
 require (
 	github.com/google/uuid v1.6.0
-	github.com/hamba/avro/v2 v2.24.1
+	github.com/hamba/avro/v2 v2.25.0
 	github.com/huandu/xstrings v1.4.0
 	github.com/substrait-io/substrait-go v0.6.0
 	github.com/tidwall/sjson v1.2.5
diff --git a/go/go.sum b/go/go.sum
index c7eb3a66deeec..bd761e1589453 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/hamba/avro/v2 v2.24.1 h1:Xi+7AnhaAc41aA/jmmYpxMsdEDOf1rdup6NJ85P7q2I=
-github.com/hamba/avro/v2 v2.24.1/go.mod h1:7vDfy/2+kYCE8WUHoj2et59GTv0ap7ptktMXu0QHePI=
+github.com/hamba/avro/v2 v2.25.0 h1:9qig/K4VP5tMq6DuKGfI6YdXncTkPJT1IJDMSv82EeI=
+github.com/hamba/avro/v2 v2.25.0/go.mod h1:I8glyswHnpED3Nlx2ZdUe+4LJnCOOyiCzLMno9i/Uu0=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=

From 93c5ddb957bb93421a8f84dbd7c5a5b7be2d6d45 Mon Sep 17 00:00:00 2001
From: PANKAJ9768 <48675737+PANKAJ9768@users.noreply.github.com>
Date: Tue, 27 Aug 2024 05:59:09 +0530
Subject: [PATCH 18/63] GH-43667: [Java] Keeping Flight default header size
 consistent between server and client  (#43697)

### Rationale for this change

### What changes are included in this PR?
Flight client can send header size larger than server can accept. This PR is to keep default values consistent across server and client.

### Are these changes tested?

### Are there any user-facing changes?

* GitHub Issue: #43667

Authored-by: pankaj kesari <pankaj.kesari99@yahoo.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../org/apache/arrow/flight/FlightServer.java |  7 ++
 .../arrow/flight/TestFlightService.java       | 73 +++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
index 05dbe42c49172..ac761457f57fd 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
@@ -188,6 +188,7 @@ public static final class Builder {
     private CallHeaderAuthenticator headerAuthenticator = CallHeaderAuthenticator.NO_OP;
     private ExecutorService executor = null;
     private int maxInboundMessageSize = MAX_GRPC_MESSAGE_SIZE;
+    private int maxHeaderListSize = MAX_GRPC_MESSAGE_SIZE;
     private int backpressureThreshold = DEFAULT_BACKPRESSURE_THRESHOLD;
     private InputStream certChain;
     private InputStream key;
@@ -324,6 +325,7 @@ public FlightServer build() {
       builder
           .executor(exec)
           .maxInboundMessageSize(maxInboundMessageSize)
+          .maxInboundMetadataSize(maxHeaderListSize)
           .addService(
               ServerInterceptors.intercept(
                   flightService,
@@ -366,6 +368,11 @@ public FlightServer build() {
       return new FlightServer(location, builder.build(), grpcExecutor);
     }
 
+    public Builder setMaxHeaderListSize(int maxHeaderListSize) {
+      this.maxHeaderListSize = maxHeaderListSize;
+      return this;
+    }
+
     /**
      * Set the maximum size of a message. Defaults to "unlimited", depending on the underlying
      * transport.
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
index 5ebeb44c1d36e..fc3f83e4eafd3 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
@@ -27,6 +27,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.Optional;
+import java.util.Random;
 import org.apache.arrow.flight.impl.Flight;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -152,4 +153,76 @@ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor
       assertEquals("No schema is present in FlightInfo", e.getMessage());
     }
   }
+
+  /**
+   * Test for GH-41584 where flight defaults for header size was not in sync b\w client and server.
+   */
+  @Test
+  public void testHeaderSizeExchangeInService() throws Exception {
+    final FlightProducer producer =
+        new NoOpFlightProducer() {
+          @Override
+          public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+            String longHeader =
+                context.getMiddleware(FlightConstants.HEADER_KEY).headers().get("long-header");
+            return new FlightInfo(
+                null,
+                descriptor,
+                Collections.emptyList(),
+                0,
+                0,
+                false,
+                IpcOption.DEFAULT,
+                longHeader.getBytes(StandardCharsets.UTF_8));
+          }
+        };
+
+    String headerVal = generateRandom(1024 * 10);
+    FlightCallHeaders callHeaders = new FlightCallHeaders();
+    callHeaders.insert("long-header", headerVal);
+    // sever with default header limit same as client
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+    }
+    // server with 15kb header limit
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .setMaxHeaderListSize(1024 * 15)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+
+      callHeaders.insert("another-header", headerVal + headerVal);
+      FlightRuntimeException e =
+          assertThrows(
+              FlightRuntimeException.class,
+              () ->
+                  client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders)));
+      assertEquals("http2 exception", e.getMessage());
+    }
+  }
+
+  private static String generateRandom(int size) {
+    String aToZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
+    Random random = new Random();
+    StringBuilder res = new StringBuilder();
+    for (int i = 0; i < size; i++) {
+      int randIndex = random.nextInt(aToZ.length());
+      res.append(aToZ.charAt(randIndex));
+    }
+    return res.toString();
+  }
 }

From 11f92491b1d2ecf700e6e023a1e413ec4c4345ae Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 11:06:13 +0900
Subject: [PATCH 19/63] MINOR: [Go] Bump github.com/substrait-io/substrait-go
 from 0.6.0 to 0.7.0 in /go (#43830)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [github.com/substrait-io/substrait-go](https://github.com/substrait-io/substrait-go) from 0.6.0 to 0.7.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/substrait-io/substrait-go/releases">github.com/substrait-io/substrait-go's releases</a>.</em></p>
<blockquote>
<h1>v0.7.0 (2024-08-25)</h1>
<h3>Features</h3>
<ul>
<li>Add convenience literal APIs (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/47">#47</a>) (<a href="https://github.com/substrait-io/substrait-go/commit/597afdb7059171990014b357fa5b0865428c034f">597afdb</a>)
<blockquote>
<ul>
<li>Introduce literal package</li>
</ul>
<hr />
</blockquote>
</li>
</ul>
<h3>Changes to the build process or auxiliary tools and libraries such as documentation generation</h3>
<ul>
<li><strong><code>extensions</code></strong> Minor refactoring in extension_mgr.go (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/45">#45</a>) (<a href="https://github.com/substrait-io/substrait-go/commit/cbd28cb19499af1923484ec82540350528249075">cbd28cb</a>)
<blockquote>
<ul>
<li>Minor refactoring in extension_mgr.go</li>
</ul>
</blockquote>
</li>
<li>Move typeName maps to types package (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/46">#46</a>) (<a href="https://github.com/substrait-io/substrait-go/commit/5556c236d4fce79681d3c9e7db9b543a8e4245ce">5556c23</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/substrait-io/substrait-go/commit/597afdb7059171990014b357fa5b0865428c034f"><code>597afdb</code></a> feat: Add convenience literal APIs (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/47">#47</a>)</li>
<li><a href="https://github.com/substrait-io/substrait-go/commit/e77df6728b1f9499d2f650a927074ffc1354a5df"><code>e77df67</code></a> feat(types) Make time precision value explicit (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/49">#49</a>)</li>
<li><a href="https://github.com/substrait-io/substrait-go/commit/a3e8ee0724d42061f76fe5c64eaece37ca468c8c"><code>a3e8ee0</code></a> feat(substrait) Update to substrait v0.55.0 (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/48">#48</a>)</li>
<li><a href="https://github.com/substrait-io/substrait-go/commit/2229c12e14ac23f631c19e9e8001d826715dccef"><code>2229c12</code></a> ci(build-test): golangci should use the go.mod version of golang (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/51">#51</a>)</li>
<li><a href="https://github.com/substrait-io/substrait-go/commit/cbd28cb19499af1923484ec82540350528249075"><code>cbd28cb</code></a> chore(extensions): Minor refactoring in extension_mgr.go (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/45">#45</a>)</li>
<li><a href="https://github.com/substrait-io/substrait-go/commit/5556c236d4fce79681d3c9e7db9b543a8e4245ce"><code>5556c23</code></a> chore: Move typeName maps to types package (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/46">#46</a>)</li>
<li><a href="https://github.com/substrait-io/substrait-go/commit/dd790cb46265074e7737d102675f790dbb3f2e56"><code>dd790cb</code></a> Add a function registry for a given BFT dialect  (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/32">#32</a>)</li>
<li><a href="https://github.com/substrait-io/substrait-go/commit/828636c51ea752cf7a34aa18e3336ac2c43fe3f4"><code>828636c</code></a> ci(build-test): Add golangci-lint to do import checking and other linting (<a href="https://redirect.github.com/substrait-io/substrait-go/issues/42">#42</a>)</li>
<li>See full diff in <a href="https://github.com/substrait-io/substrait-go/compare/v0.6.0...v0.7.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/substrait-io/substrait-go&package-manager=go_modules&previous-version=0.6.0&new-version=0.7.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index 97ac05685970c..a995eee24d563 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -49,7 +49,7 @@ require (
 	github.com/google/uuid v1.6.0
 	github.com/hamba/avro/v2 v2.25.0
 	github.com/huandu/xstrings v1.4.0
-	github.com/substrait-io/substrait-go v0.6.0
+	github.com/substrait-io/substrait-go v0.7.0
 	github.com/tidwall/sjson v1.2.5
 )
 
diff --git a/go/go.sum b/go/go.sum
index bd761e1589453..6f22e11aef03a 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -99,8 +99,8 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/substrait-io/substrait-go v0.6.0 h1:n2G/SGmrn7U5Q39VA8WeM2UfVL5Y/6HX8WAP9uJLNk4=
-github.com/substrait-io/substrait-go v0.6.0/go.mod h1:cl8Wsc7aBPDfcHp9+OrUqGpjkgrYlhcDsH/lMP6KUZA=
+github.com/substrait-io/substrait-go v0.7.0 h1:53yi73t4wW383+RD1YuhXhbjhP1KzF9GCxPC7SsRlqc=
+github.com/substrait-io/substrait-go v0.7.0/go.mod h1:7mjSvIaxk94bOF+YZn/vBOpHK4DWTpBv7nC/btjXCmc=
 github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=

From a49493d96bc3021af1a126ce33f859bfb7a2ec80 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 27 Aug 2024 11:44:19 +0900
Subject: [PATCH 20/63] MINOR: [Java] Downgrade gRPC to 1.65 (#43839)

### Rationale for this change
Newer versions don't run in all CI pipelines due to protoc using a newer glibc.

### What changes are included in this PR?

This reverts commit 4af1e491df7ac22217656668b65c3e8d55f5b5ab.

### Are these changes tested?

N/A

### Are there any user-facing changes?

No

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 77feed12f3f1d..f78d02c0c650f 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -97,7 +97,7 @@ under the License.
     <dep.slf4j.version>2.0.16</dep.slf4j.version>
     <dep.guava-bom.version>33.2.1-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.112.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.66.0</dep.grpc-bom.version>
+    <dep.grpc-bom.version>1.65.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.25.4</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.17.2</dep.jackson-bom.version>
     <dep.hadoop.version>3.4.0</dep.hadoop.version>

From 23fe1ce3361b9a6825fea77deb20d0bd7f247fe2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 11:56:45 +0900
Subject: [PATCH 21/63] MINOR: [Java] Bump org.apache.commons:commons-compress
 from 1.27.0 to 1.27.1 in /java (#43826)

Bumps org.apache.commons:commons-compress from 1.27.0 to 1.27.1.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.commons:commons-compress&package-manager=maven&previous-version=1.27.0&new-version=1.27.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/compression/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index a1f2bc861da1f..46ed8796423eb 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -50,7 +50,7 @@ under the License.
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.27.0</version>
+      <version>1.27.1</version>
     </dependency>
     <dependency>
       <groupId>com.github.luben</groupId>

From fa5d158282b316819e4e23e0903b696467a61d38 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 21:01:45 -0700
Subject: [PATCH 22/63] MINOR: [C#] Bump Microsoft.NET.Test.Sdk from 17.10.0 to
 17.11.0 in /csharp (#43822)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [Microsoft.NET.Test.Sdk](https://github.com/microsoft/vstest) from 17.10.0 to 17.11.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/microsoft/vstest/releases">Microsoft.NET.Test.Sdk's releases</a>.</em></p>
<blockquote>
<h2>v17.11.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add reference to the AdapterUtilities library in the spec docs. by <a href="https://github.com/peterwald"><code>@​peterwald</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4958">microsoft/vstest#4958</a></li>
<li>Stack trace when localized, and new messages by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4944">microsoft/vstest#4944</a></li>
<li>Fix single quote and space in F# pretty methods by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4969">microsoft/vstest#4969</a></li>
<li>Update .NET runtimes to latest patch version by <a href="https://github.com/Evangelink"><code>@​Evangelink</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4975">microsoft/vstest#4975</a></li>
<li>Update dotnetcoretests.md by <a href="https://github.com/DickBaker"><code>@​DickBaker</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4977">microsoft/vstest#4977</a></li>
<li>Add list of known TestingPlatform dlls by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4983">microsoft/vstest#4983</a></li>
<li>Update framework version used for testing, and test matrix by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4970">microsoft/vstest#4970</a></li>
<li>Add output forwarding for .NET by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4988">microsoft/vstest#4988</a></li>
<li>Remove usage of pt images before decomissioning by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4994">microsoft/vstest#4994</a></li>
<li>chore: Add more details to acquistion section. by <a href="https://github.com/voroninp"><code>@​voroninp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4999">microsoft/vstest#4999</a></li>
<li>Simplify banner by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5013">microsoft/vstest#5013</a></li>
<li>Forward standard output of testhost by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4998">microsoft/vstest#4998</a></li>
<li>Add missing copyright header by <a href="https://github.com/MichaelSimons"><code>@​MichaelSimons</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5020">microsoft/vstest#5020</a></li>
<li>Add option to not share .NET Framework testhosts by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5018">microsoft/vstest#5018</a></li>
<li>GetTypesToLoad Attribute cant be null by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5054">microsoft/vstest#5054</a></li>
<li>rawArgument in GetArgumentList cant be null by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5056">microsoft/vstest#5056</a></li>
<li>fix Atribute typo by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5057">microsoft/vstest#5057</a></li>
<li>remove unnecessary list alloc for 2 scenarios in TestRequestManager.GetSources by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5058">microsoft/vstest#5058</a></li>
<li>fix incompatiblity typo by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5059">microsoft/vstest#5059</a></li>
<li>remove redundant inline method in IsPlatformIncompatible by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5060">microsoft/vstest#5060</a></li>
<li>fix Sucess typo by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5061">microsoft/vstest#5061</a></li>
<li>use some null coalescing by <a href="https://github.com/SimonCropp"><code>@​SimonCropp</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5062">microsoft/vstest#5062</a></li>
<li>Add cts into friends of TranslationLayer by <a href="https://github.com/jakubch1"><code>@​jakubch1</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5075">microsoft/vstest#5075</a></li>
<li>Use built in sha1 for id generation by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5081">microsoft/vstest#5081</a></li>
<li>All output in terminal logger by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5083">microsoft/vstest#5083</a></li>
<li>Ignore env test by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5095">microsoft/vstest#5095</a></li>
<li>Dispose XmlReader in XmlRunSettingsUtilities by <a href="https://github.com/omajid"><code>@​omajid</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5094">microsoft/vstest#5094</a></li>
<li>Bump to macos-12 build image by <a href="https://github.com/akoeplinger"><code>@​akoeplinger</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5101">microsoft/vstest#5101</a></li>
<li>Handle ansi escape in terminal logger reporter by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5084">microsoft/vstest#5084</a></li>
<li>remove disable interactive auth by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5110">microsoft/vstest#5110</a></li>
<li>Error output as info in terminal logger by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5113">microsoft/vstest#5113</a></li>
<li>Write dll instead of target on abort, rename errors by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5115">microsoft/vstest#5115</a></li>
<li>
<ul>
<li>[rel/17.11] Update dependencies from devdiv/DevDiv/vs-code-coverage by <a href="https://github.com/dotnet-maestro"><code>@​dotnet-maestro</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5152">microsoft/vstest#5152</a></li>
</ul>
</li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/peterwald"><code>@​peterwald</code></a> made their first contribution in <a href="https://redirect.github.com/microsoft/vstest/pull/4958">microsoft/vstest#4958</a></li>
<li><a href="https://github.com/DickBaker"><code>@​DickBaker</code></a> made their first contribution in <a href="https://redirect.github.com/microsoft/vstest/pull/4977">microsoft/vstest#4977</a></li>
<li><a href="https://github.com/voroninp"><code>@​voroninp</code></a> made their first contribution in <a href="https://redirect.github.com/microsoft/vstest/pull/4999">microsoft/vstest#4999</a></li>
<li><a href="https://github.com/akoeplinger"><code>@​akoeplinger</code></a> made their first contribution in <a href="https://redirect.github.com/microsoft/vstest/pull/5101">microsoft/vstest#5101</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/microsoft/vstest/compare/v17.10.0...v17.11.0-release-24352-06">https://github.com/microsoft/vstest/compare/v17.10.0...v17.11.0-release-24352-06</a></p>
<h2>v17.11.0-release-24373-02</h2>
<h2>What's Changed</h2>
<ul>
<li>[rel/17.11] Update dependencies from devdiv/DevDiv/vs-code-coverage by <a href="https://github.com/dotnet-maestro"><code>@​dotnet-maestro</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/5152">microsoft/vstest#5152</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/microsoft/vstest/commit/c6ad3e3fa4120fb32c8a48bab4fa478adfdb2740"><code>c6ad3e3</code></a> Update dependencies from <a href="https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov">https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov</a>...</li>
<li><a href="https://github.com/microsoft/vstest/commit/910ca0dcc779068418464794f5af570eda195222"><code>910ca0d</code></a> Fix output based test (<a href="https://redirect.github.com/microsoft/vstest/issues/5131">#5131</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/0518ceaee8e9b3689ebf0de5f250eb2a2e9de1c1"><code>0518cea</code></a> Rebrand to 17.11-release (<a href="https://redirect.github.com/microsoft/vstest/issues/5128">#5128</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/5b69fd31169dd07ced917329bbb483f3b73ea98f"><code>5b69fd3</code></a> Write dll instead of target on abort, rename errors (<a href="https://redirect.github.com/microsoft/vstest/issues/5115">#5115</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/7264afa0720d846bc4d64efaf5ebe9587e071ca7"><code>7264afa</code></a> Error output as info in terminal logger (<a href="https://redirect.github.com/microsoft/vstest/issues/5113">#5113</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/3d2ea06c998a002f640b01f5d84fdefb14167502"><code>3d2ea06</code></a> remove disable interactive auth (<a href="https://redirect.github.com/microsoft/vstest/issues/5110">#5110</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/e4da2c15416e898d665f2b41bd3939b49e20859a"><code>e4da2c1</code></a> Add option to ignore tests (<a href="https://redirect.github.com/microsoft/vstest/issues/5109">#5109</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/6b3b95952d3e8c31259536fe2d7d2c0530a90347"><code>6b3b959</code></a> Ignore dump failing test while I investigate (<a href="https://redirect.github.com/microsoft/vstest/issues/5107">#5107</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/24b992fda379b2443b8b986d1c146df4d7d7e14d"><code>24b992f</code></a> Ignore dispose error (<a href="https://redirect.github.com/microsoft/vstest/issues/5105">#5105</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/bfdaf0bfd7727b509c6f509c4736ee9d685c794b"><code>bfdaf0b</code></a> Object disposed flaky (<a href="https://redirect.github.com/microsoft/vstest/issues/5104">#5104</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/microsoft/vstest/compare/v17.10.0...v17.11.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Microsoft.NET.Test.Sdk&package-manager=nuget&previous-version=17.10.0&new-version=17.11.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 047cdb94b963e..4ea02e0ed21c0 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -7,7 +7,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index dc95f9edf9f7f..fd8274230ec64 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -6,7 +6,7 @@
     </PropertyGroup>
 
     <ItemGroup>
-      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
       <PackageReference Include="xunit" Version="2.9.0" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
       <PackageReference Include="coverlet.collector" Version="6.0.2" />
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index e68a97670cc7e..eae9ab746f283 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -6,7 +6,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
     <PackageReference Include="coverlet.collector" Version="6.0.2" />
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index f05338313063c..ee71b203218f8 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -16,7 +16,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
       <PrivateAssets>all</PrivateAssets>

From c30bb6a84536d66bc1179e2a051915d5c34b2616 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 27 Aug 2024 14:49:45 +0900
Subject: [PATCH 23/63] GH-41056: [GLib][FlightRPC] Add
 gaflight_client_do_put() and related APIs (#43813)

### Rationale for this change

DoPut is needed to upload data.

### What changes are included in this PR?

* Add `gaflight_client_do_put()`
* Add `GAFlightStreamWriter`
* Add `GAFlightMetadataReader`
* Add `GAFlightDoPutResult`
* Fix `GAFlightRecordBatchWriter` API

### Are these changes tested?

No. They aren't tested yet. We will add tests when we implement server side DoPut.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #41056

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-flight-glib/client.cpp | 337 +++++++++++++++++++++++++++-
 c_glib/arrow-flight-glib/client.h   |  46 ++++
 c_glib/arrow-flight-glib/client.hpp |  16 ++
 c_glib/arrow-flight-glib/common.cpp | 102 ++-------
 c_glib/arrow-flight-glib/common.h   |   8 +-
 c_glib/arrow-glib/writer.hpp        |   4 +
 6 files changed, 421 insertions(+), 92 deletions(-)

diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
index 80c47e336f872..23f59c9da69ad 100644
--- a/c_glib/arrow-flight-glib/client.cpp
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -33,10 +33,19 @@ G_BEGIN_DECLS
  * #GAFlightStreamReader is a class for reading record batches from a
  * server.
  *
+ * #GAFlightStreamWriter is a class for writing record batches to a
+ * server.
+ *
+ * #GAFlightMetadataReader is a class for reading metadata from a
+ * server.
+ *
  * #GAFlightCallOptions is a class for options of each call.
  *
  * #GAFlightClientOptions is a class for options of each client.
  *
+ * #GAFlightDoPutResult is a class that has gaflight_client_do_put()
+ * result.
+ *
  * #GAFlightClient is a class for Apache Arrow Flight client.
  *
  * Since: 5.0.0
@@ -56,6 +65,128 @@ gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass)
 {
 }
 
+G_DEFINE_TYPE(GAFlightStreamWriter,
+              gaflight_stream_writer,
+              GAFLIGHT_TYPE_RECORD_BATCH_WRITER)
+
+static void
+gaflight_stream_writer_init(GAFlightStreamWriter *object)
+{
+}
+
+static void
+gaflight_stream_writer_class_init(GAFlightStreamWriterClass *klass)
+{
+}
+
+/**
+ * gaflight_stream_writer_done_writing:
+ * @writer: A #GAFlightStreamWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::FlightStreamWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  return garrow::check(error,
+                       flight_writer->DoneWriting(),
+                       "[flight-stream-writer][done-writing]");
+}
+
+struct GAFlightMetadataReaderPrivate
+{
+  arrow::flight::FlightMetadataReader *reader;
+};
+
+enum {
+  PROP_METADATA_READER_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightMetadataReader,
+                           gaflight_metadata_reader,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_METADATA_READER_GET_PRIVATE(object)                                     \
+  static_cast<GAFlightMetadataReaderPrivate *>(                                          \
+    gaflight_metadata_reader_get_instance_private(GAFLIGHT_METADATA_READER(object)))
+
+static void
+gaflight_metadata_reader_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+  delete priv->reader;
+  G_OBJECT_CLASS(gaflight_metadata_reader_parent_class)->finalize(object);
+}
+
+static void
+gaflight_metadata_reader_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_METADATA_READER_READER:
+    priv->reader =
+      static_cast<arrow::flight::FlightMetadataReader *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_metadata_reader_init(GAFlightMetadataReader *object)
+{
+}
+
+static void
+gaflight_metadata_reader_class_init(GAFlightMetadataReaderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_metadata_reader_finalize;
+  gobject_class->set_property = gaflight_metadata_reader_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "reader",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_METADATA_READER_READER, spec);
+}
+
+/**
+ * gaflight_metadata_reader_read:
+ * @reader: A #GAFlightMetadataReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The metadata on success, %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error)
+{
+  auto flight_reader = gaflight_metadata_reader_get_raw(reader);
+  std::shared_ptr<arrow::Buffer> metadata;
+  if (garrow::check(error,
+                    flight_reader->ReadMetadata(&metadata),
+                    "[flight-metadata-reader][read]")) {
+    return garrow_buffer_new_raw(&metadata);
+  } else {
+    return nullptr;
+  }
+}
+
 typedef struct GAFlightCallOptionsPrivate_
 {
   arrow::flight::FlightCallOptions options;
@@ -385,6 +516,137 @@ gaflight_client_options_new(void)
     g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL));
 }
 
+struct GAFlightDoPutResultPrivate
+{
+  GAFlightStreamWriter *writer;
+  GAFlightMetadataReader *reader;
+};
+
+enum {
+  PROP_DO_PUT_RESULT_RESULT = 1,
+  PROP_DO_PUT_RESULT_WRITER,
+  PROP_DO_PUT_RESULT_READER,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDoPutResult, gaflight_do_put_result, G_TYPE_OBJECT)
+
+#define GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object)                                       \
+  static_cast<GAFlightDoPutResultPrivate *>(                                             \
+    gaflight_do_put_result_get_instance_private(GAFLIGHT_DO_PUT_RESULT(object)))
+
+static void
+gaflight_do_put_result_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  if (priv->writer) {
+    g_object_unref(priv->writer);
+    priv->writer = nullptr;
+  }
+
+  if (priv->reader) {
+    g_object_unref(priv->reader);
+    priv->reader = nullptr;
+  }
+
+  G_OBJECT_CLASS(gaflight_do_put_result_parent_class)->dispose(object);
+}
+
+static void
+gaflight_do_put_result_init(GAFlightDoPutResult *object)
+{
+}
+
+static void
+gaflight_do_put_result_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_RESULT:
+    {
+      auto result = static_cast<arrow::flight::FlightClient::DoPutResult *>(
+        g_value_get_pointer(value));
+      priv->writer = gaflight_stream_writer_new_raw(result->writer.release());
+      priv->reader = gaflight_metadata_reader_new_raw(result->reader.release());
+      break;
+    }
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_WRITER:
+    g_value_set_object(value, priv->writer);
+    break;
+  case PROP_DO_PUT_RESULT_READER:
+    g_value_set_object(value, priv->reader);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_class_init(GAFlightDoPutResultClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_do_put_result_dispose;
+  gobject_class->set_property = gaflight_do_put_result_set_property;
+  gobject_class->get_property = gaflight_do_put_result_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "result",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_RESULT, spec);
+
+  /**
+   * GAFlightDoPutResult:writer:
+   *
+   * A writer to write record batches to.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("writer",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_STREAM_WRITER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_WRITER, spec);
+
+  /**
+   * GAFlightDoPutResult:reader:
+   *
+   * A reader for application metadata from the server.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("reader",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_METADATA_READER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_READER, spec);
+}
+
 struct GAFlightClientPrivate
 {
   std::shared_ptr<arrow::flight::FlightClient> client;
@@ -661,6 +923,51 @@ gaflight_client_do_get(GAFlightClient *client,
   return gaflight_stream_reader_new_raw(flight_reader.release(), TRUE);
 }
 
+/**
+ * gaflight_client_do_put:
+ * @client: A #GAFlightClient.
+ * @descriptor: A #GAFlightDescriptor.
+ * @schema: A #GArrowSchema.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Upload data to a Flight described by the given descriptor. The
+ * caller must call garrow_record_batch_writer_close() on the
+ * returned stream once they are done writing.
+ *
+ * The reader and writer are linked; closing the writer will also
+ * close the reader. Use garrow_flight_stream_writer_done_writing() to
+ * only close the write side of the channel.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The #GAFlighDoPutResult holding a reader and a writer on success,
+ *   %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  auto result = flight_client->DoPut(*flight_options, *flight_descriptor, arrow_schema);
+  if (!garrow::check(error, result, "[flight-client][do-put]")) {
+    return nullptr;
+  }
+  auto flight_result = std::move(*result);
+  return gaflight_do_put_result_new_raw(&flight_result);
+}
+
 G_END_DECLS
 
 GAFlightStreamReader *
@@ -672,7 +979,28 @@ gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                              flight_reader,
                                              "is-owner",
                                              is_owner,
-                                             NULL));
+                                             nullptr));
+}
+
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(arrow::flight::FlightStreamWriter *flight_writer)
+{
+  return GAFLIGHT_STREAM_WRITER(
+    g_object_new(GAFLIGHT_TYPE_STREAM_WRITER, "writer", flight_writer, nullptr));
+}
+
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader)
+{
+  return GAFLIGHT_METADATA_READER(
+    g_object_new(GAFLIGHT_TYPE_METADATA_READER, "reader", flight_reader, nullptr));
+}
+
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(reader);
+  return priv->reader;
 }
 
 arrow::flight::FlightCallOptions *
@@ -689,6 +1017,13 @@ gaflight_client_options_get_raw(GAFlightClientOptions *options)
   return &(priv->options);
 }
 
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result)
+{
+  return GAFLIGHT_DO_PUT_RESULT(
+    g_object_new(GAFLIGHT_TYPE_DO_PUT_RESULT, "result", flight_result, nullptr));
+}
+
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client)
 {
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
index a91bbe55e3c04..12c5a06b810e1 100644
--- a/c_glib/arrow-flight-glib/client.h
+++ b/c_glib/arrow-flight-glib/client.h
@@ -35,6 +35,35 @@ struct _GAFlightStreamReaderClass
   GAFlightRecordBatchReaderClass parent_class;
 };
 
+#define GAFLIGHT_TYPE_STREAM_WRITER (gaflight_stream_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamWriter,
+                         gaflight_stream_writer,
+                         GAFLIGHT,
+                         STREAM_WRITER,
+                         GAFlightRecordBatchWriter)
+struct _GAFlightStreamWriterClass
+{
+  GAFlightRecordBatchWriterClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error);
+
+#define GAFLIGHT_TYPE_METADATA_READER (gaflight_metadata_reader_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightMetadataReader, gaflight_metadata_reader, GAFLIGHT, METADATA_READER, GObject)
+struct _GAFlightMetadataReaderClass
+{
+  GObjectClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error);
+
 #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(
@@ -75,6 +104,15 @@ GAFLIGHT_AVAILABLE_IN_5_0
 GAFlightClientOptions *
 gaflight_client_options_new(void);
 
+#define GAFLIGHT_TYPE_DO_PUT_RESULT (gaflight_do_put_result_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightDoPutResult, gaflight_do_put_result, GAFLIGHT, DO_PUT_RESULT, GObject)
+struct _GAFlightDoPutResultClass
+{
+  GObjectClass parent_class;
+};
+
 #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject)
@@ -124,4 +162,12 @@ gaflight_client_do_get(GAFlightClient *client,
                        GAFlightCallOptions *options,
                        GError **error);
 
+GAFLIGHT_AVAILABLE_IN_18_0
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
index 185a28e6dc4bd..888f87ecb5732 100644
--- a/c_glib/arrow-flight-glib/client.hpp
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -28,6 +28,18 @@ GAFlightStreamReader *
 gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                gboolean is_owner);
 
+GAFLIGHT_EXTERN
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(arrow::flight::FlightStreamWriter *flight_writer);
+
+GAFLIGHT_EXTERN
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader);
+
+GAFLIGHT_EXTERN
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader);
+
 GAFLIGHT_EXTERN
 arrow::flight::FlightCallOptions *
 gaflight_call_options_get_raw(GAFlightCallOptions *options);
@@ -36,6 +48,10 @@ GAFLIGHT_EXTERN
 arrow::flight::FlightClientOptions *
 gaflight_client_options_get_raw(GAFlightClientOptions *options);
 
+GAFLIGHT_EXTERN
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result);
+
 GAFLIGHT_EXTERN
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client);
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index f7eea08c264b3..3deaf67cc14e8 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -1196,7 +1196,7 @@ gaflight_record_batch_reader_finalize(GObject *object)
   if (priv->is_owner) {
     delete priv->reader;
   }
-  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gaflight_record_batch_reader_parent_class)->finalize(object);
 }
 
 static void
@@ -1300,57 +1300,9 @@ gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError
   }
 }
 
-typedef struct GAFlightRecordBatchWriterPrivate_
-{
-  arrow::flight::MetadataRecordBatchWriter *writer;
-  bool is_owner;
-} GAFlightRecordBatchWriterPrivate;
-
-enum {
-  PROP_RECORD_BATCH_WRITER_WRITER = 1,
-  PROP_RECORD_BATCH_WRITER_IS_OWNER,
-};
-
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightRecordBatchWriter,
-                                    gaflight_record_batch_writer,
-                                    GARROW_TYPE_RECORD_BATCH_WRITER)
-
-#define GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object)                                 \
-  static_cast<GAFlightRecordBatchWriterPrivate *>(                                       \
-    gaflight_record_batch_writer_get_instance_private(                                   \
-      GAFLIGHT_RECORD_BATCH_WRITER(object)))
-
-static void
-gaflight_record_batch_writer_finalize(GObject *object)
-{
-  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object);
-  if (priv->is_owner) {
-    delete priv->writer;
-  }
-  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
-}
-
-static void
-gaflight_record_batch_writer_set_property(GObject *object,
-                                          guint prop_id,
-                                          const GValue *value,
-                                          GParamSpec *pspec)
-{
-  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_RECORD_BATCH_WRITER_WRITER:
-    priv->writer =
-      static_cast<arrow::flight::MetadataRecordBatchWriter *>(g_value_get_pointer(value));
-    break;
-  case PROP_RECORD_BATCH_WRITER_IS_OWNER:
-    priv->is_owner = g_value_get_boolean(value);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
+G_DEFINE_ABSTRACT_TYPE(GAFlightRecordBatchWriter,
+                       gaflight_record_batch_writer,
+                       GARROW_TYPE_RECORD_BATCH_WRITER)
 
 static void
 gaflight_record_batch_writer_init(GAFlightRecordBatchWriter *object)
@@ -1360,26 +1312,6 @@ gaflight_record_batch_writer_init(GAFlightRecordBatchWriter *object)
 static void
 gaflight_record_batch_writer_class_init(GAFlightRecordBatchWriterClass *klass)
 {
-  auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize = gaflight_record_batch_writer_finalize;
-  gobject_class->set_property = gaflight_record_batch_writer_set_property;
-
-  GParamSpec *spec;
-  spec = g_param_spec_pointer(
-    "writer",
-    nullptr,
-    nullptr,
-    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER_WRITER, spec);
-
-  spec = g_param_spec_boolean(
-    "is-owner",
-    nullptr,
-    nullptr,
-    TRUE,
-    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER_IS_OWNER, spec);
 }
 
 /**
@@ -1402,7 +1334,8 @@ gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
                                    GArrowWriteOptions *options,
                                    GError **error)
 {
-  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
   auto arrow_schema = garrow_schema_get_raw(schema);
   arrow::ipc::IpcWriteOptions arrow_write_options;
   if (options) {
@@ -1432,7 +1365,8 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
                                             GArrowBuffer *metadata,
                                             GError **error)
 {
-  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
   auto arrow_metadata = garrow_buffer_get_raw(metadata);
   return garrow::check(error,
                        flight_writer->WriteMetadata(arrow_metadata),
@@ -1440,7 +1374,7 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
 }
 
 /**
- * gaflight_record_batch_writer_write:
+ * gaflight_record_batch_writer_write_record_batch:
  * @writer: A #GAFlightRecordBatchWriter.
  * @record_batch: A #GArrowRecordBatch.
  * @metadata: (nullable): A #GArrowBuffer.
@@ -1453,12 +1387,13 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
  * Since: 18.0.0
  */
 gboolean
-gaflight_record_batch_writer_write(GAFlightRecordBatchWriter *writer,
-                                   GArrowRecordBatch *record_batch,
-                                   GArrowBuffer *metadata,
-                                   GError **error)
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error)
 {
-  auto flight_writer = gaflight_record_batch_writer_get_raw(writer);
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
   auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
   auto arrow_metadata = garrow_buffer_get_raw(metadata);
   return garrow::check(
@@ -1599,10 +1534,3 @@ gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader)
   auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(reader);
   return priv->reader;
 }
-
-arrow::flight::MetadataRecordBatchWriter *
-gaflight_record_batch_writer_get_raw(GAFlightRecordBatchWriter *writer)
-{
-  auto priv = GAFLIGHT_RECORD_BATCH_WRITER_GET_PRIVATE(writer);
-  return priv->writer;
-}
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index 91c828caabb36..726132fe4921b 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -259,9 +259,9 @@ gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
 
 GAFLIGHT_AVAILABLE_IN_18_0
 gboolean
-gaflight_record_batch_writer_write(GAFlightRecordBatchWriter *writer,
-                                   GArrowRecordBatch *record_batch,
-                                   GArrowBuffer *metadata,
-                                   GError **error);
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/writer.hpp b/c_glib/arrow-glib/writer.hpp
index aa87ffe77d79b..1d85ac52f88d1 100644
--- a/c_glib/arrow-glib/writer.hpp
+++ b/c_glib/arrow-glib/writer.hpp
@@ -25,16 +25,20 @@
 
 #include <arrow-glib/writer.h>
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchWriter *
 garrow_record_batch_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+GARROW_AVAILABLE_IN_ALL
 std::shared_ptr<arrow::ipc::RecordBatchWriter>
 garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchStreamWriter *
 garrow_record_batch_stream_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);

From b83666234c05d34c23993708160033c259b9ec26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Tue, 27 Aug 2024 10:30:23 +0200
Subject: [PATCH 24/63] GH-43815: [CI][Packaging][Python] Avoid uploading wheel
 to gemfury if version already exists (#43816)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes are included in this PR?

Check whether version exists on gemfury before trying upload

### Are these changes tested?

Will be tested via archery

### Are there any user-facing changes?

No
* GitHub Issue: #43815

Lead-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 dev/tasks/macros.jinja | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 6423ca0e9efda..df55f32222e91 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -169,10 +169,14 @@ env:
   - name: Upload package to Gemfury
     shell: bash
     run: |
-      fury push \
-        --api-token=${CROSSBOW_GEMFURY_TOKEN} \
-        --as=${CROSSBOW_GEMFURY_ORG} \
-        {{ pattern }}
+      if $(fury versions --as=${CROSSBOW_GEMFURY_ORG} --api-token=${CROSSBOW_GEMFURY_TOKEN} pyarrow | grep --fixed-strings -q "{{ arrow.no_rc_version }}"); then
+        echo "Version {{ arrow.no_rc_version }} already exists. Avoid pushing version."
+      else
+        fury push \
+          --api-token=${CROSSBOW_GEMFURY_TOKEN} \
+          --as=${CROSSBOW_GEMFURY_ORG} \
+          {{ pattern }}
+      fi
     env:
       CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
       CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}

From 6502f0e3ad046d361aba44385ab3379ed7af5b7f Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com>
Date: Tue, 27 Aug 2024 13:17:39 -0400
Subject: [PATCH 25/63] GH-43790: [Go][Parquet] Add support for LZ4_RAW
 compression codec (#43835)

### Rationale for this change

Fixes: #43790

The LZ4 compression codec for Parquet is no longer ambiguous, as it has been superceded by the [LZ4_RAW](https://github.com/apache/parquet-format/blob/master/Compression.md#lz4_raw) spec.

### What changes are included in this PR?

- Add `LZ4Raw` compression codec
- Split out `StreamingCodec` methods from core `Codec` interface
- Various conformance/roundtrip tests
- Set of benchmarks for reading/writing an Arrow table to/from Parquet, using each compression codec

### Are these changes tested?

Yes

### Are there any user-facing changes?

- New codec `LZ4Raw` is available
- `Codec` interface no long provides the following methods, which are now part of `StreamingCodec`:
  - `NewReader`
  - `NewWriter`
  - `NewWriterLevel`

* GitHub Issue: #43790

Authored-by: Joel Lubinitsky <joellubi@gmail.com>
Signed-off-by: Joel Lubinitsky <joellubi@gmail.com>
---
 go/parquet/compress/compress.go          |  22 ++--
 go/parquet/compress/compress_test.go     |   8 +-
 go/parquet/compress/lz4_raw.go           |  66 ++++++++++++
 go/parquet/file/file_reader_test.go      | 127 +++++++++++++++++++++++
 go/parquet/file/file_writer_test.go      |  58 ++++++++++-
 go/parquet/pqarrow/reader_writer_test.go | 111 ++++++++++++++++++++
 6 files changed, 380 insertions(+), 12 deletions(-)
 create mode 100644 go/parquet/compress/lz4_raw.go

diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
index b6a1349133e84..92f2ae99bb13f 100644
--- a/go/parquet/compress/compress.go
+++ b/go/parquet/compress/compress.go
@@ -49,8 +49,9 @@ var Codecs = struct {
 	Brotli Compression
 	// LZ4 unsupported in this library due to problematic issues between the Hadoop LZ4 spec vs regular lz4
 	// see: http://mail-archives.apache.org/mod_mbox/arrow-dev/202007.mbox/%3CCAAri41v24xuA8MGHLDvgSnE+7AAgOhiEukemW_oPNHMvfMmrWw@mail.gmail.com%3E
-	Lz4  Compression
-	Zstd Compression
+	Lz4    Compression
+	Zstd   Compression
+	Lz4Raw Compression
 }{
 	Uncompressed: Compression(parquet.CompressionCodec_UNCOMPRESSED),
 	Snappy:       Compression(parquet.CompressionCodec_SNAPPY),
@@ -59,17 +60,12 @@ var Codecs = struct {
 	Brotli:       Compression(parquet.CompressionCodec_BROTLI),
 	Lz4:          Compression(parquet.CompressionCodec_LZ4),
 	Zstd:         Compression(parquet.CompressionCodec_ZSTD),
+	Lz4Raw:       Compression(parquet.CompressionCodec_LZ4_RAW),
 }
 
 // Codec is an interface which is implemented for each compression type in order to make the interactions easy to
 // implement. Most consumers won't be calling GetCodec directly.
 type Codec interface {
-	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
-	NewReader(io.Reader) io.ReadCloser
-	// NewWriter provides a wrapper around a write stream to compress data before writing it.
-	NewWriter(io.Writer) io.WriteCloser
-	// NewWriterLevel is like NewWriter but allows specifying the compression level
-	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
 	// Encode encodes a block of data given by src and returns the compressed block. dst should be either nil
 	// or sized large enough to fit the compressed block (use CompressBound to allocate). dst and src should not
 	// overlap since some of the compression types don't allow it.
@@ -90,6 +86,16 @@ type Codec interface {
 	Decode(dst, src []byte) []byte
 }
 
+// StreamingCodec is an interface that may be implemented for compression codecs that expose a streaming API.
+type StreamingCodec interface {
+	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
+	NewReader(io.Reader) io.ReadCloser
+	// NewWriter provides a wrapper around a write stream to compress data before writing it.
+	NewWriter(io.Writer) io.WriteCloser
+	// NewWriterLevel is like NewWriter but allows specifying the compression level
+	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
+}
+
 var codecs = map[Compression]Codec{}
 
 // RegisterCodec adds or overrides a codec implementation for a given compression algorithm.
diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go
index 843062c0d024a..5aac74759e1f9 100644
--- a/go/parquet/compress/compress_test.go
+++ b/go/parquet/compress/compress_test.go
@@ -66,8 +66,8 @@ func TestCompressDataOneShot(t *testing.T) {
 		{compress.Codecs.Gzip},
 		{compress.Codecs.Brotli},
 		{compress.Codecs.Zstd},
+		{compress.Codecs.Lz4Raw},
 		// {compress.Codecs.Lzo},
-		// {compress.Codecs.Lz4},
 	}
 
 	for _, tt := range tests {
@@ -107,9 +107,11 @@ func TestCompressReaderWriter(t *testing.T) {
 			var buf bytes.Buffer
 			codec, err := compress.GetCodec(tt.c)
 			assert.NoError(t, err)
+			streamingCodec, ok := codec.(compress.StreamingCodec)
+			assert.True(t, ok)
 			data := makeRandomData(RandomDataSize)
 
-			wr := codec.NewWriter(&buf)
+			wr := streamingCodec.NewWriter(&buf)
 
 			const chunkSize = 1111
 			input := data
@@ -129,7 +131,7 @@ func TestCompressReaderWriter(t *testing.T) {
 			}
 			wr.Close()
 
-			rdr := codec.NewReader(&buf)
+			rdr := streamingCodec.NewReader(&buf)
 			out, err := io.ReadAll(rdr)
 			assert.NoError(t, err)
 			assert.Exactly(t, data, out)
diff --git a/go/parquet/compress/lz4_raw.go b/go/parquet/compress/lz4_raw.go
new file mode 100644
index 0000000000000..788d9520a668b
--- /dev/null
+++ b/go/parquet/compress/lz4_raw.go
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"sync"
+
+	"github.com/pierrec/lz4/v4"
+)
+
+// lz4.Compressor is not goroutine-safe, so we use a pool to amortize the cost
+// of allocating a new one for each call to Encode().
+var compressorPool = sync.Pool{New: func() interface{} { return new(lz4.Compressor) }}
+
+func compressBlock(src, dst []byte) (int, error) {
+	c := compressorPool.Get().(*lz4.Compressor)
+	defer compressorPool.Put(c)
+	return c.CompressBlock(src, dst)
+}
+
+type lz4RawCodec struct{}
+
+func (c lz4RawCodec) Encode(dst, src []byte) []byte {
+	n, err := compressBlock(src, dst[:cap(dst)])
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	// the lz4 block implementation does not allow level to be set
+	return c.Encode(dst, src)
+}
+
+func (lz4RawCodec) Decode(dst, src []byte) []byte {
+	n, err := lz4.UncompressBlock(src, dst)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) CompressBound(len int64) int64 {
+	return int64(lz4.CompressBlockBound(int(len)))
+}
+
+func init() {
+	RegisterCodec(Codecs.Lz4Raw, lz4RawCodec{})
+}
diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go
index 547ec475c2720..35f4da4e8667c 100644
--- a/go/parquet/file/file_reader_test.go
+++ b/go/parquet/file/file_reader_test.go
@@ -644,3 +644,130 @@ func TestDeltaBinaryPackedMultipleBatches(t *testing.T) {
 
 	require.Equalf(t, size, totalRows, "Expected %d rows, but got %d rows", size, totalRows)
 }
+
+// Test read file lz4_raw_compressed.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 4
+	nCols := 3
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rowsInt64, ok := rdr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsInt64 := make([]int64, nRows)
+	total, read, err := rowsInt64.ReadBatch(int64(nRows), valsInt64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsInt64 := []int64{
+		1593604800,
+		1593604800,
+		1593604801,
+		1593604801,
+	}
+	require.Equal(t, expectedValsInt64, valsInt64)
+
+	rdr, err = rgr.Column(1)
+	require.NoError(t, err)
+
+	rowsByteArray, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	valsByteArray := make([]parquet.ByteArray, nRows)
+	total, read, err = rowsByteArray.ReadBatch(int64(nRows), valsByteArray, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsByteArray := []parquet.ByteArray{
+		[]byte("abc"),
+		[]byte("def"),
+		[]byte("abc"),
+		[]byte("def"),
+	}
+	require.Equal(t, expectedValsByteArray, valsByteArray)
+
+	rdr, err = rgr.Column(2)
+	require.NoError(t, err)
+
+	rowsFloat64, ok := rdr.(*file.Float64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsFloat64 := make([]float64, nRows)
+	total, read, err = rowsFloat64.ReadBatch(int64(nRows), valsFloat64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsFloat64 := []float64{
+		42.0,
+		7.7,
+		42.125,
+		7.7,
+	}
+	require.Equal(t, expectedValsFloat64, valsFloat64)
+}
+
+// Test read file lz4_raw_compressed_larger.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawLargerFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed_larger.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 10000
+	nCols := 1
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rows, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	vals := make([]parquet.ByteArray, nRows)
+	total, read, err := rows.ReadBatch(int64(nRows), vals, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsHead := []parquet.ByteArray{
+		[]byte("c7ce6bef-d5b0-4863-b199-8ea8c7fb117b"),
+		[]byte("e8fb9197-cb9f-4118-b67f-fbfa65f61843"),
+		[]byte("885136e1-0aa1-4fdb-8847-63d87b07c205"),
+		[]byte("ce7b2019-8ebe-4906-a74d-0afa2409e5df"),
+		[]byte("a9ee2527-821b-4b71-a926-03f73c3fc8b7"),
+	}
+	require.Equal(t, expectedValsHead, vals[:len(expectedValsHead)])
+}
diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go
index 0faf3f7233bd3..12ac93d1ef4b2 100644
--- a/go/parquet/file/file_writer_test.go
+++ b/go/parquet/file/file_writer_test.go
@@ -260,7 +260,7 @@ func (t *SerializeTestSuite) TestSmallFile() {
 		compress.Codecs.Brotli,
 		compress.Codecs.Gzip,
 		compress.Codecs.Zstd,
-		// compress.Codecs.Lz4,
+		compress.Codecs.Lz4Raw,
 		// compress.Codecs.Lzo,
 	}
 	for _, c := range codecs {
@@ -540,3 +540,59 @@ func TestBatchedByteStreamSplitFileRoundtrip(t *testing.T) {
 
 	require.NoError(t, rdr.Close())
 }
+
+func TestLZ4RawFileRoundtrip(t *testing.T) {
+	input := []int64{
+		-1, 0, 1, 2, 3, 4, 5, 123456789, -123456789,
+	}
+
+	size := len(input)
+
+	field, err := schema.NewPrimitiveNodeLogical("int64", parquet.Repetitions.Required, nil, parquet.Types.Int64, 0, 1)
+	require.NoError(t, err)
+
+	schema, err := schema.NewGroupNode("test", parquet.Repetitions.Required, schema.FieldList{field}, 0)
+	require.NoError(t, err)
+
+	sink := encoding.NewBufferWriter(0, memory.DefaultAllocator)
+	writer := file.NewParquetWriter(sink, schema, file.WithWriterProps(parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Lz4Raw))))
+
+	rgw := writer.AppendRowGroup()
+	cw, err := rgw.NextColumn()
+	require.NoError(t, err)
+
+	i64ColumnWriter, ok := cw.(*file.Int64ColumnChunkWriter)
+	require.True(t, ok)
+
+	nVals, err := i64ColumnWriter.WriteBatch(input, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, nVals)
+
+	require.NoError(t, cw.Close())
+	require.NoError(t, rgw.Close())
+	require.NoError(t, writer.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(sink.Bytes()))
+	require.NoError(t, err)
+
+	require.Equal(t, 1, rdr.NumRowGroups())
+	require.EqualValues(t, size, rdr.NumRows())
+
+	rgr := rdr.RowGroup(0)
+	cr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	i64ColumnReader, ok := cr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	output := make([]int64, size)
+
+	total, valuesRead, err := i64ColumnReader.ReadBatch(int64(size), output, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, total)
+	require.EqualValues(t, size, valuesRead)
+
+	require.Equal(t, input, output)
+
+	require.NoError(t, rdr.Close())
+}
diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go
index 31bd0eba84388..e020c7d9457a9 100644
--- a/go/parquet/pqarrow/reader_writer_test.go
+++ b/go/parquet/pqarrow/reader_writer_test.go
@@ -19,6 +19,8 @@ package pqarrow_test
 import (
 	"bytes"
 	"context"
+	"fmt"
+	"math"
 	"testing"
 	"unsafe"
 
@@ -26,8 +28,10 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
 	"github.com/apache/arrow/go/v18/parquet/file"
 	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/stretchr/testify/require"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
 )
@@ -275,3 +279,110 @@ func BenchmarkReadColumnFloat64(b *testing.B) {
 		benchReadTable(b, tt.name, tbl, int64(arrow.Int32Traits.BytesRequired(SIZELEN)))
 	}
 }
+
+var compressTestCases = []struct {
+	c compress.Compression
+}{
+	{compress.Codecs.Uncompressed},
+	{compress.Codecs.Snappy},
+	{compress.Codecs.Gzip},
+	{compress.Codecs.Brotli},
+	{compress.Codecs.Zstd},
+	{compress.Codecs.Lz4Raw},
+	// {compress.Codecs.Lzo},
+}
+
+func buildTableForTest(mem memory.Allocator) arrow.Table {
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "strings", Type: arrow.BinaryTypes.String},
+			{Name: "bools", Type: arrow.FixedWidthTypes.Boolean},
+			{Name: "repeated_int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "repeated_strings", Type: arrow.BinaryTypes.String},
+			{Name: "repeated_bools", Type: arrow.FixedWidthTypes.Boolean},
+		},
+		nil,
+	)
+	bldr := array.NewRecordBuilder(mem, schema)
+	defer bldr.Release()
+
+	for i := 0; i < SIZELEN; i++ {
+		bldr.Field(0).(*array.Int64Builder).Append(int64(i))
+		bldr.Field(1).(*array.StringBuilder).Append(fmt.Sprint(i))
+		bldr.Field(2).(*array.BooleanBuilder).Append(i%2 == 0)
+		bldr.Field(3).(*array.Int64Builder).Append(0)
+		bldr.Field(4).(*array.StringBuilder).Append("the string is the same")
+		bldr.Field(5).(*array.BooleanBuilder).Append(true)
+	}
+
+	rec := bldr.NewRecord()
+	return array.NewTableFromRecords(schema, []arrow.Record{rec})
+}
+
+func BenchmarkWriteTableCompressed(b *testing.B) {
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	var uncompressedSize uint64
+	for idxCol := 0; int64(idxCol) < table.NumCols(); idxCol++ {
+		column := table.Column(idxCol)
+		for _, chunk := range column.Data().Chunks() {
+			uncompressedSize += chunk.Data().SizeInBytes()
+		}
+	}
+
+	var buf bytes.Buffer
+	buf.Grow(int(uncompressedSize))
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			buf.Reset()
+			b.ResetTimer()
+			b.SetBytes(int64(uncompressedSize))
+			for n := 0; n < b.N; n++ {
+				require.NoError(b,
+					pqarrow.WriteTable(
+						table,
+						&buf,
+						math.MaxInt64,
+						parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+						pqarrow.DefaultWriterProps(),
+					),
+				)
+			}
+		})
+	}
+}
+
+func BenchmarkReadTableCompressed(b *testing.B) {
+	ctx := context.Background()
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			var buf bytes.Buffer
+			err := pqarrow.WriteTable(
+				table,
+				&buf,
+				math.MaxInt64,
+				parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+				pqarrow.DefaultWriterProps(),
+			)
+			require.NoError(b, err)
+
+			compressedBytes := buf.Len()
+			rdr := bytes.NewReader(buf.Bytes())
+
+			b.ResetTimer()
+			b.SetBytes(int64(compressedBytes))
+			for n := 0; n < b.N; n++ {
+				tab, err := pqarrow.ReadTable(ctx, rdr, nil, pqarrow.ArrowReadProperties{}, mem)
+				require.NoError(b, err)
+				defer tab.Release()
+			}
+		})
+	}
+}

From ce1e724d7ea292746ede6a538519658f1ecab849 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 27 Aug 2024 19:17:55 +0200
Subject: [PATCH 26/63] MINOR: [CI] Use `docker compose` on self-hosted ARM
 builds (#43844)

### Rationale for this change

The Docker client version on the ARM64 self-hosted runners is now recent enough, so we don't need to use `docker-compose` there anymore.

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/cpp.yml                 | 5 +----
 .github/workflows/go.yml                  | 5 -----
 dev/tasks/java-jars/github.yml            | 2 --
 dev/tasks/linux-packages/github.linux.yml | 1 -
 dev/tasks/python-wheels/github.linux.yml  | 1 -
 5 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index a82e1eb76660b..c5482f730823b 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -99,7 +99,6 @@ jobs:
             cat <<JSON >> "$GITHUB_OUTPUT"
           {
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
             "clang-tools": "10",
             "image": "ubuntu-cpp",
             "llvm": "10",
@@ -124,9 +123,6 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
-      # By default, use `docker compose` because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: ${{ matrix.archery-use-legacy-docker-compose || '0' }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
       CLANG_TOOLS: ${{ matrix.clang-tools }}
       LLVM: ${{ matrix.llvm }}
@@ -147,6 +143,7 @@ jobs:
         run: |
           sudo apt update
           sudo apt install -y --no-install-recommends python3 python3-dev python3-pip
+          python3 -m pip install -U pip
       - name: Setup Archery
         run: python3 -m pip install -e dev/archery[docker]
       - name: Execute Docker Build
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 20c78d86cb2a3..ffd543691d5b2 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -78,14 +78,12 @@ jobs:
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
             "go": "1.21",
             "runs-on": ["self-hosted", "arm", "linux"]
           },
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
             "go": "1.22",
             "runs-on": ["self-hosted", "arm", "linux"]
           }
@@ -106,9 +104,6 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
-      # By default, use Docker CLI because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: ${{ matrix.archery-use-legacy-docker-compose || '0' }}
       GO: ${{ matrix.go }}
     steps:
       - name: Checkout Arrow
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 7cbd5f05dab4a..bdbed1bd678e6 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -30,7 +30,6 @@ jobs:
       ARCH: {{ '${{ matrix.platform.archery_arch }}' }}
       ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }}
       ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }}
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: {{ "${{matrix.platform.archery_use_legacy_docker_compose || '0'}}" }}
     strategy:
       fail-fast: false
       matrix:
@@ -45,7 +44,6 @@ jobs:
             archery_arch: "arm64v8"
             archery_arch_alias: "aarch64"
             archery_arch_short: "arm64"
-            archery_use_legacy_docker_compose: "1"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_free_space()|indent }}
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 4bf2295ef3e95..cce976cd60e4e 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -29,7 +29,6 @@ jobs:
     {% endif %}
     env:
       ARCHITECTURE: {{ architecture }}
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: {{ '1' if architecture == 'arm64' else '0' }}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_login_dockerhub()|indent }}
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 2854d4349fb7c..97746ba3f9b8b 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -33,7 +33,6 @@ jobs:
       ARCH: amd64
       {% else %}
       ARCH: arm64v8
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
       {% endif %}
       PYTHON: "{{ python_version }}"
       {% if python_version == "3.13" %}

From 75ca5b3631144f58ea3edbe6b4933a686c0e0fd9 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 28 Aug 2024 05:47:43 +0900
Subject: [PATCH 27/63] GH-43805: [C++] Enable filesystem automatically when
 one of ARROW_{AZURE,GCS,HDFS,S3}=ON is specified (#43806)

### Rationale for this change

`ARROW_{AZURE,GCS,HDFS,S3}=ON` are meaningful only when filesystem is enabled. If the user specified one of them, we can assume that the user wants to enable filesystem.

### What changes are included in this PR?

Enable `ARROW_FILESYSTEM` when one of `ARROW_{AZURE,GCS,HDFS,S3}=ON` are specified.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.

`ARROW_FILESYSTEM` is enabled automatically with one of `ARROW_{AZURE,GCS,HDFS,S3}=ON`.
* GitHub Issue: #43805

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/DefineOptions.cmake | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 41466a1c22404..755887314d110 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -303,7 +303,10 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_IPC)
 
   define_option(ARROW_AZURE
-                "Build Arrow with Azure support (requires the Azure SDK for C++)" OFF)
+                "Build Arrow with Azure support (requires the Azure SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_BUILD_UTILITIES "Build Arrow commandline utilities" OFF)
 
@@ -346,9 +349,16 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_WITH_UTF8PROC)
 
   define_option(ARROW_GCS
-                "Build Arrow with GCS support (requires the GCloud SDK for C++)" OFF)
+                "Build Arrow with GCS support (requires the GCloud SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
-  define_option(ARROW_HDFS "Build the Arrow HDFS bridge" OFF)
+  define_option(ARROW_HDFS
+                "Build the Arrow HDFS bridge"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_IPC "Build the Arrow IPC extensions" ON)
 
@@ -398,7 +408,11 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_HDFS
                 ARROW_JSON)
 
-  define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF)
+  define_option(ARROW_S3
+                "Build Arrow with S3 support (requires the AWS SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_SKYHOOK
                 "Build the Skyhook libraries"

From 09bb24a5cdf5b6e73334e9a8b521f0188d940c73 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Wed, 28 Aug 2024 06:13:31 +0530
Subject: [PATCH 28/63] MINOR: [Java] Logback dependency upgrade (#43842)

### Rationale for this change

Fusing https://github.com/apache/arrow/pull/43752 and https://github.com/apache/arrow/pull/43827 dependabot PRs into a single PR.

### What changes are included in this PR?

Keeping a single version for both `logback-classic` and `logback-core`.

### Are these changes tested?

N/A

### Are there any user-facing changes?

No

Authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/memory/memory-netty/pom.xml |  1 -
 java/pom.xml                     | 13 ++++++++++++-
 java/tools/pom.xml               |  1 -
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index f2d4d2d0fe3bc..6cf573dd4d381 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -56,7 +56,6 @@ under the License.
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-core</artifactId>
-      <version>1.3.14</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/pom.xml b/java/pom.xml
index f78d02c0c650f..577f23e6a719c 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -111,6 +111,7 @@ under the License.
     <mockito.core.version>5.11.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
     <checker.framework.version>3.46.0</checker.framework.version>
+    <logback.version>1.5.7</logback.version>
     <doclint>none</doclint>
     <additionalparam>-Xdoclint:none</additionalparam>
     <!-- List of add-opens arg line arguments for tests -->
@@ -221,6 +222,16 @@ under the License.
         <type>pom</type>
         <scope>import</scope>
       </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-classic</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-core</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -274,7 +285,7 @@ under the License.
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.4.14</version>
+      <version>${logback.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 94566495dff19..082f06860c61b 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -59,7 +59,6 @@ under the License.
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.4.14</version>
       <scope>test</scope>
     </dependency>
     <!--

From 9c801bbb9de55591ec026719c45180be0363f7e6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 28 Aug 2024 09:50:20 +0900
Subject: [PATCH 29/63] MINOR: [Java] Bump commons-cli:commons-cli from 1.8.0
 to 1.9.0 in /java (#43825)

Bumps commons-cli:commons-cli from 1.8.0 to 1.9.0.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=commons-cli:commons-cli&package-manager=maven&previous-version=1.8.0&new-version=1.9.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/flight/flight-integration-tests/pom.xml | 2 +-
 java/flight/flight-sql/pom.xml               | 2 +-
 java/tools/pom.xml                           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml
index a154062ba814d..7da5156404dba 100644
--- a/java/flight/flight-integration-tests/pom.xml
+++ b/java/flight/flight-integration-tests/pom.xml
@@ -58,7 +58,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index c9c589d202ac6..92bab5e206757 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -118,7 +118,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
       <optional>true</optional>
     </dependency>
   </dependencies>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 082f06860c61b..d261496040b78 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -54,7 +54,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>

From 6b268f62a8a172249ef35f093009c740c32e1f36 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 28 Aug 2024 09:50:36 +0900
Subject: [PATCH 30/63] MINOR: [Java] Bump
 com.google.api.grpc:proto-google-common-protos from 2.42.0 to 2.43.0 in /java
 (#43824)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [com.google.api.grpc:proto-google-common-protos](https://github.com/googleapis/sdk-platform-java) from 2.42.0 to 2.43.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/googleapis/sdk-platform-java/releases">com.google.api.grpc:proto-google-common-protos's releases</a>.</em></p>
<blockquote>
<h2>v2.43.0</h2>
<h2><a href="https://github.com/googleapis/sdk-platform-java/compare/v2.42.0...v2.43.0">2.43.0</a> (2024-07-25)</h2>
<h3>Features</h3>
<ul>
<li>add <code>transport</code> option to <code>generation_config.yaml</code> (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3052">#3052</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/3b1a91551ab6bbaf6a46950e1677c15cdd70d2e9">3b1a915</a>)</li>
<li>get released version from versions.txt to render <code>README.md</code> (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3007">#3007</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/99bb2b339eadd480dcc1753d4ba3aeda3b5c64de">99bb2b3</a>)</li>
<li>Introduce java.time to Gax-Java (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/1872">#1872</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/308aeafc9f04795d2e1df8206c84689b11c4323a">308aeaf</a>)</li>
<li>Mark <code>getDefaultEndpoint()</code> with <a href="https://github.com/ObsoleteApi"><code>@​ObsoleteApi</code></a> (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2347">#2347</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/e46648f112a149f967783539d30b4c44474b39fe">e46648f</a>)</li>
<li>parse <code>BUILD.bzel</code> to determine whether a commit that only changed <code>BUILD.bazel</code> is a qualified commit (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2937">#2937</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/502f80101dec191befb660a1aba6d0c354758c18">502f801</a>)</li>
</ul>
<h3>Bug Fixes</h3>
<ul>
<li>Fix:  (<a href="https://github.com/googleapis/sdk-platform-java/commit/d996c2dfb4b1cb115e0a2cd117eebd8a4ab41cad">d996c2d</a>)</li>
<li><code>BaseApiTracer</code> to noop on attemptFailed via overloaded method call (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3016">#3016</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/2fc938a819f4a2da9cfd25d2d306b62f53fa1f91">2fc938a</a>)</li>
<li>Generator to skip generation for empty services. (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3051">#3051</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/ff2c48543940bb0ceb78392b0f5af67568823002">ff2c485</a>)</li>
<li>restore hermetic build image publication (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2952">#2952</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/97a6d678569b7d8768ff83fe5370d8966a06ca95">97a6d67</a>)</li>
</ul>
<h3>Dependencies</h3>
<ul>
<li>update dependency com.fasterxml.jackson:jackson-bom to v2.17.2 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3028">#3028</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/d16f9d114a75fb8a77dfc39edf6fe2aa2f967704">d16f9d1</a>)</li>
<li>update dependency com.google.cloud.opentelemetry:detector-resources-support to v0.30.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2975">#2975</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/b3ec93f1925ff5a92b47200a61303e5561dbb1b8">b3ec93f</a>)</li>
<li>update dependency com.google.cloud.opentelemetry:detector-resources-support to v0.31.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3044">#3044</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/6bd07dc9fb589c72cf7b86bb2e0137687e1f61f2">6bd07dc</a>)</li>
<li>update dependency com.google.errorprone:error_prone_annotations to v2.29.2 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3058">#3058</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/8ea0868e9e67a4c58075b98de0cf7b51635ea2f8">8ea0868</a>)</li>
<li>update dependency com.google.errorprone:error_prone_annotations to v2.29.2 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3059">#3059</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/81b23dc88eeff492f6cef6328ce3b5d32992f500">81b23dc</a>)</li>
<li>update dependency com.google.guava:guava to v33.2.1-jre (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3027">#3027</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/12ee456727d0cd9d86aeadd65e633b5d7abb3d50">12ee456</a>)</li>
<li>update dependency commons-codec:commons-codec to v1.17.1 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3049">#3049</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/58d94b795db06fa76099c871501d2a1f7465633b">58d94b7</a>)</li>
<li>update dependency dev.cel:cel to v0.6.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3050">#3050</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/bc332d95919c0a1909e43f4ab7c7fe4db406697e">bc332d9</a>)</li>
<li>update dependency net.bytebuddy:byte-buddy to v1.14.18 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3029">#3029</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/8799cf602a3204a4adeaf4f48000979e49107959">8799cf6</a>)</li>
<li>update dependency org.apache.commons:commons-lang3 to v3.15.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3060">#3060</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/2538334aff96a4ad70a26bac2141d3235856b1a1">2538334</a>)</li>
<li>update dependency org.checkerframework:checker-qual to v3.45.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2988">#2988</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/4edd216163662008ee1060b6eb82ca673045826f">4edd216</a>)</li>
<li>update google api dependencies (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2951">#2951</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/c16f6c95636b4997861ef3914b06f7819a8bd69a">c16f6c9</a>)</li>
<li>update google auth library dependencies to v1.24.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3039">#3039</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/98b5bd7d2ddb98c7e52bffd0b93c5661a1c9d39b">98b5bd7</a>)</li>
<li>update googleapis/java-cloud-bom digest to 47c5dbc (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2974">#2974</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/57623f08441969e0ff0170a72779fb8425ff6592">57623f0</a>)</li>
<li>update grpc dependencies to v1.65.1 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3061">#3061</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/27497e215cda4e8ad17fce2faa794b600edfc4cd">27497e2</a>)</li>
<li>update junit5 monorepo to v5.10.3 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2963">#2963</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/bc55fe1fe55876ee3b4843cefb05ee401c323865">bc55fe1</a>)</li>
<li>update netty dependencies to v4.1.112.final (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3057">#3057</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/5af127be3d9dadcdf0d9a5519ce6ad3b2e3bb481">5af127b</a>)</li>
<li>update opentelemetry-java monorepo to v1.40.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3035">#3035</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/5c31c4211993f25d2c352ef8f3e085187bc5fd30">5c31c42</a>)</li>
<li>Use Gapic-Showcase v0.35.1 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3018">#3018</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/43773f0cf2418051b2c0e6245100973b8ce2152e">43773f0</a>)</li>
</ul>
<h3>Documentation</h3>
<ul>
<li>add support option to 'new issue' choices (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3055">#3055</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/6a2a17d1d84da9d45a4be6675ea6ca0235b42c99">6a2a17d</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/googleapis/sdk-platform-java/blob/main/CHANGELOG.md">com.google.api.grpc:proto-google-common-protos's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/googleapis/sdk-platform-java/compare/v2.42.0...v2.43.0">2.43.0</a> (2024-07-25)</h2>
<h3>Features</h3>
<ul>
<li>add <code>transport</code> option to <code>generation_config.yaml</code> (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3052">#3052</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/3b1a91551ab6bbaf6a46950e1677c15cdd70d2e9">3b1a915</a>)</li>
<li>get released version from versions.txt to render <code>README.md</code> (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3007">#3007</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/99bb2b339eadd480dcc1753d4ba3aeda3b5c64de">99bb2b3</a>)</li>
<li>Introduce java.time to Gax-Java (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/1872">#1872</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/308aeafc9f04795d2e1df8206c84689b11c4323a">308aeaf</a>)</li>
<li>Mark <code>getDefaultEndpoint()</code> with <a href="https://github.com/ObsoleteApi"><code>@​ObsoleteApi</code></a> (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2347">#2347</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/e46648f112a149f967783539d30b4c44474b39fe">e46648f</a>)</li>
<li>parse <code>BUILD.bzel</code> to determine whether a commit that only changed <code>BUILD.bazel</code> is a qualified commit (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2937">#2937</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/502f80101dec191befb660a1aba6d0c354758c18">502f801</a>)</li>
</ul>
<h3>Bug Fixes</h3>
<ul>
<li>Fix:  (<a href="https://github.com/googleapis/sdk-platform-java/commit/d996c2dfb4b1cb115e0a2cd117eebd8a4ab41cad">d996c2d</a>)</li>
<li><code>BaseApiTracer</code> to noop on attemptFailed via overloaded method call (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3016">#3016</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/2fc938a819f4a2da9cfd25d2d306b62f53fa1f91">2fc938a</a>)</li>
<li>Generator to skip generation for empty services. (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3051">#3051</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/ff2c48543940bb0ceb78392b0f5af67568823002">ff2c485</a>)</li>
<li>restore hermetic build image publication (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2952">#2952</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/97a6d678569b7d8768ff83fe5370d8966a06ca95">97a6d67</a>)</li>
</ul>
<h3>Dependencies</h3>
<ul>
<li>update dependency com.fasterxml.jackson:jackson-bom to v2.17.2 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3028">#3028</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/d16f9d114a75fb8a77dfc39edf6fe2aa2f967704">d16f9d1</a>)</li>
<li>update dependency com.google.cloud.opentelemetry:detector-resources-support to v0.30.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2975">#2975</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/b3ec93f1925ff5a92b47200a61303e5561dbb1b8">b3ec93f</a>)</li>
<li>update dependency com.google.cloud.opentelemetry:detector-resources-support to v0.31.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3044">#3044</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/6bd07dc9fb589c72cf7b86bb2e0137687e1f61f2">6bd07dc</a>)</li>
<li>update dependency com.google.errorprone:error_prone_annotations to v2.29.2 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3058">#3058</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/8ea0868e9e67a4c58075b98de0cf7b51635ea2f8">8ea0868</a>)</li>
<li>update dependency com.google.errorprone:error_prone_annotations to v2.29.2 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3059">#3059</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/81b23dc88eeff492f6cef6328ce3b5d32992f500">81b23dc</a>)</li>
<li>update dependency com.google.guava:guava to v33.2.1-jre (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3027">#3027</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/12ee456727d0cd9d86aeadd65e633b5d7abb3d50">12ee456</a>)</li>
<li>update dependency commons-codec:commons-codec to v1.17.1 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3049">#3049</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/58d94b795db06fa76099c871501d2a1f7465633b">58d94b7</a>)</li>
<li>update dependency dev.cel:cel to v0.6.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3050">#3050</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/bc332d95919c0a1909e43f4ab7c7fe4db406697e">bc332d9</a>)</li>
<li>update dependency net.bytebuddy:byte-buddy to v1.14.18 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3029">#3029</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/8799cf602a3204a4adeaf4f48000979e49107959">8799cf6</a>)</li>
<li>update dependency org.apache.commons:commons-lang3 to v3.15.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3060">#3060</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/2538334aff96a4ad70a26bac2141d3235856b1a1">2538334</a>)</li>
<li>update dependency org.checkerframework:checker-qual to v3.45.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2988">#2988</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/4edd216163662008ee1060b6eb82ca673045826f">4edd216</a>)</li>
<li>update google api dependencies (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2951">#2951</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/c16f6c95636b4997861ef3914b06f7819a8bd69a">c16f6c9</a>)</li>
<li>update google auth library dependencies to v1.24.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3039">#3039</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/98b5bd7d2ddb98c7e52bffd0b93c5661a1c9d39b">98b5bd7</a>)</li>
<li>update googleapis/java-cloud-bom digest to 47c5dbc (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2974">#2974</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/57623f08441969e0ff0170a72779fb8425ff6592">57623f0</a>)</li>
<li>update grpc dependencies to v1.65.1 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3061">#3061</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/27497e215cda4e8ad17fce2faa794b600edfc4cd">27497e2</a>)</li>
<li>update junit5 monorepo to v5.10.3 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2963">#2963</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/bc55fe1fe55876ee3b4843cefb05ee401c323865">bc55fe1</a>)</li>
<li>update netty dependencies to v4.1.112.final (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3057">#3057</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/5af127be3d9dadcdf0d9a5519ce6ad3b2e3bb481">5af127b</a>)</li>
<li>update opentelemetry-java monorepo to v1.40.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3035">#3035</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/5c31c4211993f25d2c352ef8f3e085187bc5fd30">5c31c42</a>)</li>
<li>Use Gapic-Showcase v0.35.1 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3018">#3018</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/43773f0cf2418051b2c0e6245100973b8ce2152e">43773f0</a>)</li>
</ul>
<h3>Documentation</h3>
<ul>
<li>add support option to 'new issue' choices (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3055">#3055</a>) (<a href="https://github.com/googleapis/sdk-platform-java/commit/6a2a17d1d84da9d45a4be6675ea6ca0235b42c99">6a2a17d</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/5e6da42ecfc5818d53a3053614a71680b482484f"><code>5e6da42</code></a> chore(main): release 2.43.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/2953">#2953</a>)</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/10f950edf9f5ddc293d102bb46bae7aecdae6b98"><code>10f950e</code></a> chore: make generator version an optional param (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3040">#3040</a>)</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/43de5b568dc0bbdaddf2419652e0cb16de77ddb6"><code>43de5b5</code></a> build(deps): update dependency com.google.cloud:google-cloud-shared-config to...</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/941f08c6e58cf1662ffce195cd7f351992ae9806"><code>941f08c</code></a> chore: Add OpenTelemetry semantic conventions to shared dependencies (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3020">#3020</a>)</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/27497e215cda4e8ad17fce2faa794b600edfc4cd"><code>27497e2</code></a> deps: update grpc dependencies to v1.65.1 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3061">#3061</a>)</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/ff2c48543940bb0ceb78392b0f5af67568823002"><code>ff2c485</code></a> fix: Generator to skip generation for empty services. (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3051">#3051</a>)</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/5af127be3d9dadcdf0d9a5519ce6ad3b2e3bb481"><code>5af127b</code></a> deps: update netty dependencies to v4.1.112.final (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3057">#3057</a>)</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/8ea0868e9e67a4c58075b98de0cf7b51635ea2f8"><code>8ea0868</code></a> deps: update dependency com.google.errorprone:error_prone_annotations to v2.2...</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/81b23dc88eeff492f6cef6328ce3b5d32992f500"><code>81b23dc</code></a> deps: update dependency com.google.errorprone:error_prone_annotations to v2.2...</li>
<li><a href="https://github.com/googleapis/sdk-platform-java/commit/2538334aff96a4ad70a26bac2141d3235856b1a1"><code>2538334</code></a> deps: update dependency org.apache.commons:commons-lang3 to v3.15.0 (<a href="https://redirect.github.com/googleapis/sdk-platform-java/issues/3060">#3060</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/googleapis/sdk-platform-java/compare/v2.42.0...v2.43.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.api.grpc:proto-google-common-protos&package-manager=maven&previous-version=2.42.0&new-version=2.43.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/flight/flight-core/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index e4d1d5d3885a5..dec679de3a543 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -134,7 +134,7 @@ under the License.
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-common-protos</artifactId>
-      <version>2.42.0</version>
+      <version>2.43.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>

From 9d40a6a6630f951b9ccf8e8984c58dc0602921eb Mon Sep 17 00:00:00 2001
From: "yihao.dai" <954206947@qq.com>
Date: Wed, 28 Aug 2024 23:32:10 +0800
Subject: [PATCH 31/63] GH-43860: [Go][Parquet] Handle the error correctly
 (#43861)

### Rationale for this change
Fixes: https://github.com/apache/arrow/issues/43860

### What changes are included in this PR?
Return error correctly

### Are these changes tested?
Yes

### Are there any user-facing changes?
Nope

* GitHub Issue: #43860

Authored-by: bigsheeper <yihao.dai@zilliz.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/file/file_reader_test.go | 49 +++++++++++++++++++++++++++++
 go/parquet/file/record_reader.go    |  2 +-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go
index 35f4da4e8667c..74926c958e2f7 100644
--- a/go/parquet/file/file_reader_test.go
+++ b/go/parquet/file/file_reader_test.go
@@ -452,6 +452,55 @@ func TestRleBooleanEncodingFileRead(t *testing.T) {
 	assert.Equal(t, expected, values[:len(expected)])
 }
 
+type mockBadReader struct {
+	cnt    int
+	reader *os.File
+}
+
+func (m *mockBadReader) Seek(offset int64, whence int) (int64, error) {
+	return m.reader.Seek(offset, whence)
+}
+
+func (m *mockBadReader) ReadAt(p []byte, off int64) (n int, err error) {
+	if m.cnt == 0 {
+		return 0, fmt.Errorf("mock error")
+	}
+	m.cnt--
+	return m.reader.ReadAt(p, off)
+}
+
+func TestBadReader(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	filePath := path.Join(dir, "byte_stream_split_extended.gzip.parquet")
+	f, err := os.Open(filePath)
+	assert.NoError(t, err)
+	defer f.Close()
+
+	reader := &mockBadReader{
+		cnt:    2,
+		reader: f,
+	}
+	r, err := file.NewParquetReader(reader, file.WithReadProps(&parquet.ReaderProperties{
+		BufferSize:            int64(1024),
+		BufferedStreamEnabled: true,
+	}))
+	assert.NoError(t, err)
+
+	fileReader, err := pqarrow.NewFileReader(r, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
+	assert.NoError(t, err)
+
+	columnReader, err := fileReader.GetColumn(context.Background(), 0)
+	assert.NoError(t, err)
+
+	_, err = columnReader.NextBatch(1)
+	assert.ErrorContains(t, err, "mock error") // Expect an error to occur.
+}
+
 func TestByteStreamSplitEncodingFileRead(t *testing.T) {
 	dir := os.Getenv("PARQUET_TEST_DATA")
 	if dir == "" {
diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go
index 667ffca77a8d1..765f4a9d34b33 100755
--- a/go/parquet/file/record_reader.go
+++ b/go/parquet/file/record_reader.go
@@ -645,7 +645,7 @@ func (rr *recordReader) ReadRecords(numRecords int64) (int64, error) {
 		}
 	}
 
-	return recordsRead, nil
+	return recordsRead, rr.Err()
 }
 
 func (rr *recordReader) ReleaseValidBits() *memory.Buffer {

From 0bc91dd2447696a208adec266270ab722099b0e2 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Wed, 28 Aug 2024 15:07:02 -0300
Subject: [PATCH 32/63] GH-43854: [C++] Expose the set of device types where a
 ChunkedArray is allocated (#43853)

### Rationale for this change

`ChunkedArray`s allow flexible allocation of arrays -- the whole array doesn't have to be allocated in huge contiguous buffers. Nothing today prevents chunked arrays from being made of chunks allocated in different devices and that is good. But we need a way to query the set of devices where a chunked array is allocated at. This PR adds that missing part.

### What changes are included in this PR?

Addition of:
- the `DeviceAllocationTypeSet` class
- `ChunkedArray::device_types()`
- `Datum::device_types()`

Moved `enum DeviceAllocationType` to the `type_fwd.h` header because `device.h` is too expensive of a header to hold this widely used `enum`.

### Are these changes tested?

Added more asserts to `chunked_array_test.cc`.

### Are there any user-facing changes?

New APIs.
* GitHub Issue: #43854

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                |  1 +
 cpp/src/arrow/chunked_array.cc              | 13 +++
 cpp/src/arrow/chunked_array.h               |  8 ++
 cpp/src/arrow/chunked_array_test.cc         |  5 ++
 cpp/src/arrow/compute/function.cc           |  1 +
 cpp/src/arrow/compute/kernel.cc             |  1 +
 cpp/src/arrow/compute/kernel.h              |  1 +
 cpp/src/arrow/datum.cc                      | 40 +++++++++
 cpp/src/arrow/datum.h                       |  3 +
 cpp/src/arrow/device.h                      | 18 ----
 cpp/src/arrow/device_allocation_type_set.cc | 80 +++++++++++++++++
 cpp/src/arrow/device_allocation_type_set.h  | 97 +++++++++++++++++++++
 cpp/src/arrow/type_fwd.h                    | 21 +++++
 13 files changed, 271 insertions(+), 18 deletions(-)
 create mode 100644 cpp/src/arrow/device_allocation_type_set.cc
 create mode 100644 cpp/src/arrow/device_allocation_type_set.h

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 6b0ac8c23c75a..65343df1291ba 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -373,6 +373,7 @@ set(ARROW_SRCS
     config.cc
     datum.cc
     device.cc
+    device_allocation_type_set.cc
     extension_type.cc
     extension/bool8.cc
     extension/uuid.cc
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index c36b736d5d5df..dd6aa51534fcb 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -27,6 +27,7 @@
 #include "arrow/array/array_nested.h"
 #include "arrow/array/util.h"
 #include "arrow/array/validate.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/pretty_print.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -86,6 +87,18 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::MakeEmpty(
   return std::make_shared<ChunkedArray>(std::move(new_chunks));
 }
 
+DeviceAllocationTypeSet ChunkedArray::device_types() const {
+  if (chunks_.empty()) {
+    // An empty ChunkedArray is considered to be CPU-only.
+    return DeviceAllocationTypeSet::CpuOnly();
+  }
+  DeviceAllocationTypeSet set;
+  for (const auto& chunk : chunks_) {
+    set.add(chunk->device_type());
+  }
+  return set;
+}
+
 bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) const {
   if (length_ != other.length()) {
     return false;
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 5d300861d85c2..c65b6cb6e227f 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -25,6 +25,7 @@
 
 #include "arrow/chunk_resolver.h"
 #include "arrow/compare.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
@@ -116,6 +117,13 @@ class ARROW_EXPORT ChunkedArray {
   /// \return an ArrayVector of chunks
   const ArrayVector& chunks() const { return chunks_; }
 
+  /// \return The set of device allocation types used by the chunks in this
+  /// chunked array.
+  DeviceAllocationTypeSet device_types() const;
+
+  /// \return true if all chunks are allocated on CPU-accessible memory.
+  bool is_cpu() const { return device_types().is_cpu_only(); }
+
   /// \brief Construct a zero-copy slice of the chunked array with the
   /// indicated offset and length
   ///
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index e9cc283b53cd5..b796e9250008a 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -61,12 +61,17 @@ TEST_F(TestChunkedArray, Make) {
                        ChunkedArray::Make({}, int64()));
   AssertTypeEqual(*int64(), *result->type());
   ASSERT_EQ(result->num_chunks(), 0);
+  // Empty chunked arrays are treated as CPU-allocated.
+  ASSERT_TRUE(result->is_cpu());
 
   auto chunk0 = ArrayFromJSON(int8(), "[0, 1, 2]");
   auto chunk1 = ArrayFromJSON(int16(), "[3, 4, 5]");
 
   ASSERT_OK_AND_ASSIGN(result, ChunkedArray::Make({chunk0, chunk0}));
   ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8()));
+  // All chunks are CPU-accessible.
+  ASSERT_TRUE(result->is_cpu());
+  ASSERT_TRUE(result2->is_cpu());
   AssertChunkedEqual(*result, *result2);
 
   ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0, chunk1}));
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index e1a2e8c5d8879..0478a3d1e801a 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/tracing_internal.h"
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 5c87ef2cd0561..5e7461cc52d0e 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -24,6 +24,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 1adb3e96c97c8..cfa1cd8193f36 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -31,6 +31,7 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc
index 2ac230232e1b7..b19d186447547 100644
--- a/cpp/src/arrow/datum.cc
+++ b/cpp/src/arrow/datum.cc
@@ -25,6 +25,7 @@
 #include "arrow/array/array_base.h"
 #include "arrow/array/util.h"
 #include "arrow/chunked_array.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
 #include "arrow/table.h"
@@ -156,6 +157,45 @@ ArrayVector Datum::chunks() const {
   return this->chunked_array()->chunks();
 }
 
+DeviceAllocationTypeSet Datum::device_types() const {
+  switch (kind()) {
+    case NONE:
+      break;
+    case SCALAR:
+      // Scalars are asssumed as always residing in CPU memory for now.
+      return DeviceAllocationTypeSet::CpuOnly();
+    case ARRAY:
+      return DeviceAllocationTypeSet{array()->device_type()};
+    case CHUNKED_ARRAY:
+      return chunked_array()->device_types();
+    case RECORD_BATCH: {
+      auto& columns = record_batch()->columns();
+      if (columns.empty()) {
+        // An empty RecordBatch is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.add(column->device_type());
+      }
+      return set;
+    }
+    case TABLE: {
+      auto& columns = table()->columns();
+      if (columns.empty()) {
+        // An empty Table is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.Add(column->device_types());
+      }
+      return set;
+    }
+  }
+  return {};
+}
+
 bool Datum::Equals(const Datum& other) const {
   if (this->kind() != other.kind()) return false;
 
diff --git a/cpp/src/arrow/datum.h b/cpp/src/arrow/datum.h
index 31b2d2274c900..4a88e7a81125c 100644
--- a/cpp/src/arrow/datum.h
+++ b/cpp/src/arrow/datum.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arrow/array/data.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/scalar.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -295,6 +296,8 @@ struct ARROW_EXPORT Datum {
   /// \return empty if not arraylike
   ArrayVector chunks() const;
 
+  DeviceAllocationTypeSet device_types() const;
+
   /// \brief True if the two data are equal
   bool Equals(const Datum& other) const;
 
diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h
index f5cca0d27d7b2..1dbe5b4b13e89 100644
--- a/cpp/src/arrow/device.h
+++ b/cpp/src/arrow/device.h
@@ -32,24 +32,6 @@
 
 namespace arrow {
 
-/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
-enum class DeviceAllocationType : char {
-  kCPU = 1,
-  kCUDA = 2,
-  kCUDA_HOST = 3,
-  kOPENCL = 4,
-  kVULKAN = 7,
-  kMETAL = 8,
-  kVPI = 9,
-  kROCM = 10,
-  kROCM_HOST = 11,
-  kEXT_DEV = 12,
-  kCUDA_MANAGED = 13,
-  kONEAPI = 14,
-  kWEBGPU = 15,
-  kHEXAGON = 16,
-};
-
 class MemoryManager;
 
 /// \brief EXPERIMENTAL: Abstract interface for hardware devices
diff --git a/cpp/src/arrow/device_allocation_type_set.cc b/cpp/src/arrow/device_allocation_type_set.cc
new file mode 100644
index 0000000000000..83e9e57f2ee47
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.cc
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include "arrow/device_allocation_type_set.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type) {
+  switch (type) {
+    case DeviceAllocationType::kCPU:
+      return "CPU";
+    case DeviceAllocationType::kCUDA:
+      return "CUDA";
+    case DeviceAllocationType::kCUDA_HOST:
+      return "CUDA_HOST";
+    case DeviceAllocationType::kOPENCL:
+      return "OPENCL";
+    case DeviceAllocationType::kVULKAN:
+      return "VULKAN";
+    case DeviceAllocationType::kMETAL:
+      return "METAL";
+    case DeviceAllocationType::kVPI:
+      return "VPI";
+    case DeviceAllocationType::kROCM:
+      return "ROCM";
+    case DeviceAllocationType::kROCM_HOST:
+      return "ROCM_HOST";
+    case DeviceAllocationType::kEXT_DEV:
+      return "EXT_DEV";
+    case DeviceAllocationType::kCUDA_MANAGED:
+      return "CUDA_MANAGED";
+    case DeviceAllocationType::kONEAPI:
+      return "ONEAPI";
+    case DeviceAllocationType::kWEBGPU:
+      return "WEBGPU";
+    case DeviceAllocationType::kHEXAGON:
+      return "HEXAGON";
+  }
+  return "<UNKNOWN>";
+}
+
+std::string DeviceAllocationTypeSet::ToString() const {
+  std::string result = "{";
+  for (int i = 1; i <= kDeviceAllocationTypeMax; i++) {
+    if (device_type_bitset_.test(i)) {
+      // Skip all the unused values in the enum.
+      switch (i) {
+        case 0:
+        case 5:
+        case 6:
+          continue;
+      }
+      if (result.size() > 1) {
+        result += ", ";
+      }
+      result += DeviceAllocationTypeToCStr(static_cast<DeviceAllocationType>(i));
+    }
+  }
+  result += "}";
+  return result;
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/device_allocation_type_set.h b/cpp/src/arrow/device_allocation_type_set.h
new file mode 100644
index 0000000000000..974367307e6d4
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.h
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <bitset>
+#include <string>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+ARROW_EXPORT
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type);
+
+class ARROW_EXPORT DeviceAllocationTypeSet {
+ private:
+  std::bitset<kDeviceAllocationTypeMax + 1> device_type_bitset_;
+
+ public:
+  /// \brief Construct an empty set of device types.
+  DeviceAllocationTypeSet() = default;
+
+  /// \brief Construct a set of device types with a single device type.
+  DeviceAllocationTypeSet(  // NOLINT implicit construction
+      DeviceAllocationType accepted_device_type) {
+    add(accepted_device_type);
+  }
+
+  /// \brief Construct a set of device types containing only "kCPU".
+  static DeviceAllocationTypeSet CpuOnly() {
+    return DeviceAllocationTypeSet{DeviceAllocationType::kCPU};
+  }
+
+  /// \brief Construct a set of device types containing all device types.
+  static DeviceAllocationTypeSet All() {
+    DeviceAllocationTypeSet all;
+    all.device_type_bitset_.set();
+    // Don't set the invalid enum values.
+    all.device_type_bitset_.reset(0);
+    all.device_type_bitset_.reset(5);
+    all.device_type_bitset_.reset(6);
+    return all;
+  }
+
+  /// \brief Add a device type to the set of device types.
+  void add(DeviceAllocationType device_type) {
+    device_type_bitset_.set(static_cast<int>(device_type));
+  }
+
+  /// \brief Remove a device type from the set of device types.
+  void remove(DeviceAllocationType device_type) {
+    device_type_bitset_.reset(static_cast<int>(device_type));
+  }
+
+  /// \brief Return true iff the set only contains the CPU device type.
+  bool is_cpu_only() const {
+    return device_type_bitset_ == CpuOnly().device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes the
+  /// device type.
+  bool contains(DeviceAllocationType device_type) const {
+    return device_type_bitset_.test(static_cast<int>(device_type));
+  }
+
+  /// \brief Add all device types from another set to this set.
+  void Add(DeviceAllocationTypeSet other) {
+    device_type_bitset_ |= other.device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes all the
+  /// device types in the other set.
+  bool Contains(DeviceAllocationTypeSet other) const {
+    // other \subseteq this <==> (other \intersect this == other)
+    return (other.device_type_bitset_ & device_type_bitset_) == other.device_type_bitset_;
+  }
+
+  std::string ToString() const;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 08777d247edbf..8faebe217f141 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -724,4 +724,25 @@ ARROW_EXPORT MemoryPool* default_memory_pool();
 
 constexpr int64_t kDefaultBufferAlignment = 64;
 
+/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
+enum class DeviceAllocationType : char {
+  kCPU = 1,
+  kCUDA = 2,
+  kCUDA_HOST = 3,
+  kOPENCL = 4,
+  kVULKAN = 7,
+  kMETAL = 8,
+  kVPI = 9,
+  kROCM = 10,
+  kROCM_HOST = 11,
+  kEXT_DEV = 12,
+  kCUDA_MANAGED = 13,
+  kONEAPI = 14,
+  kWEBGPU = 15,
+  kHEXAGON = 16,
+};
+constexpr int kDeviceAllocationTypeMax = 16;
+
+class DeviceAllocationTypeSet;
+
 }  // namespace arrow

From 58415d1fac50cb829b3dcf08526033d6db8c30db Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 29 Aug 2024 02:54:32 +0200
Subject: [PATCH 33/63] GH-38183: [CI][Python] Use pipx to install GCS
 testbench (#43852)

### Rationale for this change

Installing the GCS testbench using the same Python that's being used to test PyArrow is fragile: some testbench versions may not be compatible, or there could be conflicts among the dependencies of the respective libraries.

### What changes are included in this PR?

Use `pipx` to install the GCS testbench in a separate, controlled environment, using an appropriate Python version.

### Are these changes tested?

Yes, by CI.

### Are there any user-facing changes?

No.

* GitHub Issue: #38183

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml                     |  8 ++-
 appveyor.yml                                  |  1 +
 ci/appveyor-cpp-build.bat                     |  2 +
 ci/docker/conda-cpp.dockerfile                | 12 ++--
 ci/docker/conda-python.dockerfile             |  5 --
 ...ython-wheel-windows-test-vs2019.dockerfile | 27 +++++---
 ci/docker/ubuntu-20.04-cpp-minimal.dockerfile |  1 +
 ci/docker/ubuntu-22.04-cpp-minimal.dockerfile |  1 +
 ci/docker/ubuntu-24.04-cpp-minimal.dockerfile |  1 +
 ci/scripts/install_gcs_testbench.bat          | 13 +++-
 ci/scripts/install_gcs_testbench.sh           | 20 +++---
 ci/scripts/python_wheel_windows_test.bat      | 40 ++++++-----
 cpp/src/arrow/filesystem/gcsfs_test.cc        | 68 +++++++++----------
 python/pyarrow/tests/conftest.py              |  7 +-
 python/scripts/run_emscripten_tests.py        |  2 +-
 r/tests/testthat/test-gcs.R                   |  4 +-
 16 files changed, 122 insertions(+), 90 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index c5482f730823b..fd23e0cf217e6 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -465,15 +465,17 @@ jobs:
           chmod +x /usr/local/bin/minio.exe
       - name: Set up Python
         uses: actions/setup-python@v5.1.1
+        id: python-install
         with:
           python-version: 3.9
       - name: Install Google Cloud Storage Testbench
-        shell: bash
+        shell: msys2 {0}
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_PYTHON: ${{ steps.python-install.outputs.python-path }}
         run: |
           ci/scripts/install_gcs_testbench.sh default
-          echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV
       - name: Test
         shell: msys2 {0}
         run: |
-          PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}"
           ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build"
diff --git a/appveyor.yml b/appveyor.yml
index 5954251d34733..9e4582f1d8d7f 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -24,6 +24,7 @@ only_commits:
     - appveyor.yml
     - ci/appveyor*
     - ci/conda*
+    - ci/scripts/*.bat
     - cpp/
     - format/
     - python/
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index f688fbb63a9ad..08a052e82f24d 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -46,7 +46,9 @@ set ARROW_CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON
 set ARROW_CXXFLAGS=/WX /MP
 
 @rem Install GCS testbench
+set PIPX_BIN_DIR=C:\Windows\
 call %CD%\ci\scripts\install_gcs_testbench.bat
+storage-testbench -h || exit /B
 
 @rem
 @rem Build and test Arrow C++ libraries (including Parquet)
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index dff1f2224809a..eb035d887a158 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -42,17 +42,19 @@ RUN mamba install -q -y \
         valgrind && \
     mamba clean --all
 
+# We want to install the GCS testbench using the Conda base environment's Python,
+# because the test environment's Python may later change.
+ENV PIPX_PYTHON=/opt/conda/bin/python3
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
+
 # Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to 
-# be on the path for the tests to run.  
+# be on the path for the tests to run.
 ENV PATH=/opt/conda/envs/arrow/bin:$PATH
 
 COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_azurite.sh
 
-# We want to install the GCS testbench using the same Python binary that the Conda code will use.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index 027fd589cecca..7e8dbe76f6248 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -32,11 +32,6 @@ RUN mamba install -q -y \
         nomkl && \
     mamba clean --all
 
-# XXX The GCS testbench was already installed in conda-cpp.dockerfile,
-# but we changed the installed Python version above, so we need to reinstall it.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 ENV ARROW_ACERO=ON \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
diff --git a/ci/docker/python-wheel-windows-test-vs2019.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
index 5f488a4c285ff..625ab25f848f2 100644
--- a/ci/docker/python-wheel-windows-test-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -35,16 +35,27 @@ RUN setx path "%path%;C:\Program Files\Git\usr\bin"
 RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
     rm -rf Python*
 
+# Install the GCS testbench using a well-known Python version.
+# NOTE: cannot use pipx's `--fetch-missing-python` because of
+# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
+RUN choco install -r -y --pre --no-progress python --version=3.11.9
+ENV PIPX_BIN_DIR=C:\\Windows\\
+ENV PIPX_PYTHON="C:\Python311\python.exe"
+COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
+RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
+    storage-testbench -h
+
 # Define the full version number otherwise choco falls back to patch number 0 (3.8 => 3.8.0)
 ARG python=3.8
-RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
-    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
-    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
-    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
+RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10") & \
+    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13") & \
+    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11") & \
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1")
 
 # Install archiver to extract xz archives
-RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION% & \
-    python -m pip install --no-cache-dir -U pip setuptools & \
+RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% && \
     choco install --no-progress -r -y archiver
+
+ENV PYTHON=$python
diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
index e17c0306f115d..4d867a448c994 100644
--- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
index 341d8a87e8661..f26cad51f0983 100644
--- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
index a995ab2a8bc2d..125bc7ba46a81 100644
--- a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         tzdata-legacy \
         wget && \
diff --git a/ci/scripts/install_gcs_testbench.bat b/ci/scripts/install_gcs_testbench.bat
index b03d0c2ad6608..f54f98db7cac8 100644
--- a/ci/scripts/install_gcs_testbench.bat
+++ b/ci/scripts/install_gcs_testbench.bat
@@ -17,9 +17,18 @@
 
 @echo on
 
-set GCS_TESTBENCH_VERSION="v0.36.0"
+set GCS_TESTBENCH_VERSION="v0.40.0"
+
+set PIPX_FLAGS=--verbose
+if NOT "%PIPX_PYTHON%"=="" (
+  set PIPX_FLAGS=--python %PIPX_PYTHON% %PIPX_FLAGS%
+)
+
+python -m pip install -U pipx || exit /B 1
 
 @REM Install GCS testbench %GCS_TESTBENCH_VERSION%
-python -m pip install  ^
+pipx install %PIPX_FLAGS% ^
         "https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^
         || exit /B 1
+
+pipx list --verbose
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 5471b3cc238ca..78826e94d3294 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 
 if [ "$#" -ne 1 ]; then
   echo "Usage: $0 <storage-testbench version>"
@@ -34,19 +34,23 @@ case "$(uname -m)" in
     ;;
 esac
 
-# On newer pythons install into the system will fail, so override that
-export PIP_BREAK_SYSTEM_PACKAGES=1
-
 version=$1
 if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
-  # Latests versions of Testbench require newer setuptools
-  python3 -m pip install --upgrade setuptools
 fi
 
+: ${PIPX_PYTHON:=$(which python3)}
+
+export PIP_BREAK_SYSTEM_PACKAGES=1
+${PIPX_PYTHON} -m pip install -U pipx
+
 # This script is run with PYTHON undefined in some places,
 # but those only use older pythons.
 if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
-  python3 -m pip install \
-    "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
+  pipx_flags=--verbose
+  if [[ $(id -un) == "root" ]]; then
+    # Install globally as /root/.local/bin is typically not in $PATH
+    pipx_flags="${pipx_flags} --global"
+  fi
+  ${PIPX_PYTHON} -m pipx install ${pipx_flags} "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
 fi
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index 87c0bb1252024..cac3f18434b6c 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -37,28 +37,32 @@ set PYARROW_TEST_TENSORFLOW=ON
 set ARROW_TEST_DATA=C:\arrow\testing\data
 set PARQUET_TEST_DATA=C:\arrow\cpp\submodules\parquet-testing\data
 
-@REM Install testing dependencies
-pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
+@REM List installed Pythons
+py -0p
+
+set PYTHON_CMD=py -%PYTHON%
 
-@REM Install GCS testbench
-call "C:\arrow\ci\scripts\install_gcs_testbench.bat"
+%PYTHON_CMD% -m pip install -U pip setuptools || exit /B 1
+
+@REM Install testing dependencies
+%PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
 
 @REM Install the built wheels
-python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1 
+%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1
 
 @REM Test that the modules are importable
-python -c "import pyarrow" || exit /B 1
-python -c "import pyarrow._gcsfs" || exit /B 1
-python -c "import pyarrow._hdfs" || exit /B 1 
-python -c "import pyarrow._s3fs" || exit /B 1
-python -c "import pyarrow.csv" || exit /B 1
-python -c "import pyarrow.dataset" || exit /B 1
-python -c "import pyarrow.flight" || exit /B 1
-python -c "import pyarrow.fs" || exit /B 1
-python -c "import pyarrow.json" || exit /B 1
-python -c "import pyarrow.orc" || exit /B 1
-python -c "import pyarrow.parquet" || exit /B 1
-python -c "import pyarrow.substrait" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._gcsfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._hdfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._s3fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.csv" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.dataset" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.flight" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.json" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.orc" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.parquet" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1
 
 @rem Download IANA Timezone Database for ORC C++
 curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
@@ -67,4 +71,4 @@ arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata
 set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo
 
 @REM Execute unittest
-pytest -r s --pyargs pyarrow || exit /B 1
+%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc
index a6022a8d21681..2098cf4d7f319 100644
--- a/cpp/src/arrow/filesystem/gcsfs_test.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_test.cc
@@ -95,44 +95,41 @@ class GcsTestbench : public ::testing::Environment {
     if (const auto* env = std::getenv("PYTHON")) {
       names = {env};
     }
-    auto error = std::string(
-        "Could not start GCS emulator."
-        " Used the following list of python interpreter names:");
-    for (const auto& interpreter : names) {
-      auto exe_path = bp::search_path(interpreter);
-      error += " " + interpreter;
-      if (exe_path.empty()) {
-        error += " (exe not found)";
-        continue;
-      }
+    auto error = std::string("Could not start GCS emulator 'storage-testbench'");
 
-      bp::ipstream output;
-      server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_,
-                                  bp::std_err > output);
+    auto testbench_is_running = [](bp::child& process, bp::ipstream& output) {
       // Wait for message: "* Restarting with"
-      auto testbench_is_running = [&output, this](bp::child& process) {
-        std::string line;
-        std::chrono::time_point<std::chrono::steady_clock> end =
-            std::chrono::steady_clock::now() + std::chrono::seconds(10);
-        while (server_process_.valid() && server_process_.running() &&
-               std::chrono::steady_clock::now() < end) {
-          if (output.peek() && std::getline(output, line)) {
-            std::cerr << line << std::endl;
-            if (line.find("* Restarting with") != std::string::npos) return true;
-          } else {
-            std::this_thread::sleep_for(std::chrono::milliseconds(20));
-          }
+      std::string line;
+      std::chrono::time_point<std::chrono::steady_clock> end =
+          std::chrono::steady_clock::now() + std::chrono::seconds(10);
+      while (process.valid() && process.running() &&
+             std::chrono::steady_clock::now() < end) {
+        if (output.peek() && std::getline(output, line)) {
+          std::cerr << line << std::endl;
+          if (line.find("* Restarting with") != std::string::npos) return true;
+        } else {
+          std::this_thread::sleep_for(std::chrono::milliseconds(20));
         }
-        return false;
-      };
+      }
+      return false;
+    };
 
-      if (testbench_is_running(server_process_)) break;
-      error += " (failed to start)";
-      server_process_.terminate();
-      server_process_.wait();
+    auto exe_path = bp::search_path("storage-testbench");
+    if (!exe_path.empty()) {
+      bp::ipstream output;
+      server_process_ =
+          bp::child(exe_path, "--port", port_, group_, bp::std_err > output);
+      if (!testbench_is_running(server_process_, output)) {
+        error += " (failed to start)";
+        server_process_.terminate();
+        server_process_.wait();
+      }
+    } else {
+      error += " (exe not found)";
+    }
+    if (!server_process_.valid()) {
+      error_ = std::move(error);
     }
-    if (server_process_.valid() && server_process_.valid()) return;
-    error_ = std::move(error);
   }
 
   bool running() { return server_process_.running(); }
@@ -140,7 +137,10 @@ class GcsTestbench : public ::testing::Environment {
   ~GcsTestbench() override {
     // Brutal shutdown, kill the full process group because the GCS testbench may launch
     // additional children.
-    group_.terminate();
+    try {
+      group_.terminate();
+    } catch (bp::process_error&) {
+    }
     if (server_process_.valid()) {
       server_process_.wait();
     }
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index e1919497b5116..7a222cec8a7c4 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -233,17 +233,16 @@ def minio_server_health_check(address):
 def gcs_server():
     port = find_free_port()
     env = os.environ.copy()
-    args = [sys.executable, '-m', 'testbench', '--port', str(port)]
+    exe = 'storage-testbench'
+    args = [exe, '--port', str(port)]
     proc = None
     try:
-        # check first if testbench module is available
-        import testbench  # noqa:F401
         # start server
         proc = subprocess.Popen(args, env=env)
         # Make sure the server is alive.
         if proc.poll() is not None:
             pytest.skip(f"Command {args} did not start server successfully!")
-    except (ModuleNotFoundError, OSError) as e:
+    except OSError as e:
         pytest.skip(f"Command {args} failed to execute: {e}")
     else:
         yield {
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 1a4b4a4e05614..53d3dd52bd8a6 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -335,7 +335,7 @@ def _load_pyarrow_in_runner(driver, wheel_name):
         """
 import pyarrow,pathlib
 pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
-pytest.main([pyarrow_dir, '-v'])
+pytest.main([pyarrow_dir, '-r', 's'])
 """,
         wait_for_terminate=False,
     )
diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R
index d671c12138c60..54159e82ca60f 100644
--- a/r/tests/testthat/test-gcs.R
+++ b/r/tests/testthat/test-gcs.R
@@ -116,12 +116,12 @@ test_that("GcsFileSystem$create() can read json_credentials", {
 })
 
 skip_on_cran()
-skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.")
+skip_if_not(system("storage-testbench -h") == 0, message = "googleapis-storage-testbench is not installed.")
 library(dplyr)
 
 testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001")
 
-pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port),
+pid_minio <- sys::exec_background("storage-testbench", c("--port", testbench_port),
   std_out = FALSE,
   std_err = FALSE # TODO: is there a good place to send output?
 )

From 6c17b794509d3931225cf295ae864204162c786f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 29 Aug 2024 17:53:14 +0900
Subject: [PATCH 34/63] GH-43877: [Ruby] Add support for 0 decimal value
 (#43882)

### Rationale for this change

Apache Arrow C++ may use "0.EXXX" string such as "0.E-9" for 0 decimal value. Ruby's BigDecimal doesn't accept it.

### What changes are included in this PR?

We convert "0.EXXX" to "0.0EXXX" in Ruby.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #43877

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ruby/red-arrow/lib/arrow/decimal128-array.rb | 4 +++-
 ruby/red-arrow/lib/arrow/decimal256-array.rb | 4 +++-
 ruby/red-arrow/test/test-decimal128-array.rb | 6 ++++++
 ruby/red-arrow/test/test-decimal256-array.rb | 6 ++++++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/ruby/red-arrow/lib/arrow/decimal128-array.rb b/ruby/red-arrow/lib/arrow/decimal128-array.rb
index a5ee53be7b229..528c878a859b5 100644
--- a/ruby/red-arrow/lib/arrow/decimal128-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128-array.rb
@@ -18,7 +18,9 @@
 module Arrow
   class Decimal128Array
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/decimal256-array.rb b/ruby/red-arrow/lib/arrow/decimal256-array.rb
index 8c2306dfe3627..32841ca4862f5 100644
--- a/ruby/red-arrow/lib/arrow/decimal256-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256-array.rb
@@ -19,7 +19,9 @@ module Arrow
   class Decimal256Array
     # @since 3.0.0
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
index a50e2cf4a4832..a6e7c4e1ac433 100644
--- a/ruby/red-arrow/test/test-decimal128-array.rb
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -38,4 +38,10 @@ class Decimal128ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal128Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/test/test-decimal256-array.rb
index ed542f2d6c75e..053e948fc84b7 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/test/test-decimal256-array.rb
@@ -38,4 +38,10 @@ class Decimal256ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal256Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end

From 30893876e0650d9c3c003c5646f94c274ade9669 Mon Sep 17 00:00:00 2001
From: Rossi Sun <zanmato1984@gmail.com>
Date: Thu, 29 Aug 2024 19:09:52 +0800
Subject: [PATCH 35/63] GH-43870: [C++][Acero] Fix typos in join benchmark
 (#43871)

### Rationale for this change

These are rather obvious typos.

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* GitHub Issue: #43870

Authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/acero/hash_join_benchmark.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc
index 470960b1c5062..e3e37e249e6a3 100644
--- a/cpp/src/arrow/acero/hash_join_benchmark.cc
+++ b/cpp/src/arrow/acero/hash_join_benchmark.cc
@@ -104,7 +104,7 @@ class JoinBenchmark {
       key_cmp.push_back(JoinKeyCmp::EQ);
     }
 
-    for (size_t i = 0; i < settings.build_payload_types.size(); i++) {
+    for (size_t i = 0; i < settings.probe_payload_types.size(); i++) {
       std::string name = "lp" + std::to_string(i);
       DCHECK_OK(l_schema_builder.AddField(field(name, settings.probe_payload_types[i])));
     }
@@ -279,7 +279,7 @@ static void BM_HashJoinBasic_MatchesPerRow(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(0));
 
   settings.num_build_batches = static_cast<int>(st.range(1));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
@@ -291,7 +291,7 @@ static void BM_HashJoinBasic_PayloadSize(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(1));
 
   settings.num_build_batches = static_cast<int>(st.range(2));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }

From 6db12f2ca7cccb5f90e1cd0e753d5e92fe3b17bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Thu, 29 Aug 2024 13:36:17 +0200
Subject: [PATCH 36/63] GH-41696: [Python][Packaging] Bump
 MACOSX_DEPLOYMENT_TARGET to 12 instead of 11 (#43137)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

As shown on the associated issue there seems to be a problem with `MACOSX_DEPLOYMENT_TARGET` 11 on the wheels.

### What changes are included in this PR?

Update `MACOSX_DEPLOYMENT_TARGET` everywhere to the latest supported macOS version.

### Are these changes tested?

Via CI, even though the issue was not reproducible on CI.

### Are there any user-facing changes?

Yes, wheels won't be available for macOS 11 but those were crashing on the previous release.
* GitHub Issue: #41696

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 .github/workflows/python.yml                 |  2 +-
 ci/scripts/python_wheel_macos_build.sh       |  2 +-
 ci/vcpkg/arm64-osx-static-debug.cmake        |  2 +-
 ci/vcpkg/arm64-osx-static-release.cmake      |  2 +-
 ci/vcpkg/universal2-osx-static-debug.cmake   |  2 +-
 ci/vcpkg/universal2-osx-static-release.cmake |  2 +-
 cpp/src/arrow/flight/CMakeLists.txt          |  6 ++++++
 dev/tasks/tasks.yml                          | 10 +++++-----
 dev/tasks/verify-rc/github.macos.yml         |  2 +-
 python/CMakeLists.txt                        |  2 +-
 ruby/red-arrow/ext/arrow/extconf.rb          |  2 +-
 11 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 916db2580e371..854d792f3100d 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -163,7 +163,7 @@ jobs:
       ARROW_BUILD_TESTS: OFF
       PYARROW_TEST_LARGE_MEMORY: ON
       # Current oldest supported version according to https://endoflife.date/macos
-      MACOSX_DEPLOYMENT_TARGET: 10.15
+      MACOSX_DEPLOYMENT_TARGET: 12.0
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v4
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index d5430f26748eb..92b962f1740bd 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -34,7 +34,7 @@ rm -rf ${source_dir}/python/pyarrow/*.so.*
 
 echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
 export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
-export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.15}
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-12.0}
 export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
 
 if [ $arch = "arm64" ]; then
diff --git a/ci/vcpkg/arm64-osx-static-debug.cmake b/ci/vcpkg/arm64-osx-static-debug.cmake
index f511819a2edd9..32ae7bc433489 100644
--- a/ci/vcpkg/arm64-osx-static-debug.cmake
+++ b/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/arm64-osx-static-release.cmake b/ci/vcpkg/arm64-osx-static-release.cmake
index 43d65efb2651b..dde46cd763afe 100644
--- a/ci/vcpkg/arm64-osx-static-release.cmake
+++ b/ci/vcpkg/arm64-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/ci/vcpkg/universal2-osx-static-debug.cmake b/ci/vcpkg/universal2-osx-static-debug.cmake
index 8abc1ebf838f1..d3ef0d67eb719 100644
--- a/ci/vcpkg/universal2-osx-static-debug.cmake
+++ b/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/universal2-osx-static-release.cmake b/ci/vcpkg/universal2-osx-static-release.cmake
index 2eb36c15175b2..3018aa93e5fbb 100644
--- a/ci/vcpkg/universal2-osx-static-release.cmake
+++ b/ci/vcpkg/universal2-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 98f93705f6f56..835c4fc83bf18 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -26,6 +26,12 @@ endif()
 if(WIN32)
   list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)
 endif()
+# Updating the MACOSX_DEPLOYMENT_TARGET to 12 required us to explicitly
+# link Flight with OpenSSL on macOS. Read this comment for more details:
+# https://github.com/apache/arrow/pull/43137#pullrequestreview-2267476893
+if(APPLE AND ARROW_USE_OPENSSL)
+  list(APPEND ARROW_FLIGHT_LINK_LIBS ${ARROW_OPENSSL_LIBS})
+endif()
 
 set(ARROW_FLIGHT_TEST_LINKAGE "${ARROW_TEST_LINKAGE}")
 if(Protobuf_USE_STATIC_LIBS)
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index cae34c3231381..7f52fe7b05232 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -413,7 +413,7 @@ tasks:
 
 {############################## Wheel macOS ####################################}
 
-{% for macos_version, macos_codename in [("10.15", "catalina")] %}
+{% for macos_version, macos_codename in [("12.0", "monterey")] %}
   {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
   wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
@@ -424,25 +424,25 @@ tasks:
       arrow_jemalloc: "ON"
       python_version: "{{ python_version }}"
       macos_deployment_target: "{{ macos_version }}"
-      runs_on: "macos-13"
+      runs_on: "macos-12"
       vcpkg_arch: "amd64"
     artifacts:
       - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
 
 {% endfor %}
 
-  wheel-macos-big-sur-{{ python_tag }}-arm64:
+  wheel-macos-monterey-{{ python_tag }}-arm64:
     ci: github
     template: python-wheels/github.osx.yml
     params:
       arch: "arm64"
       arrow_jemalloc: "OFF"
       python_version: "{{ python_version }}"
-      macos_deployment_target: "11.0"
+      macos_deployment_target: "12.0"
       runs_on: "macos-14"
       vcpkg_arch: "arm64"
     artifacts:
-      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_12_0_arm64.whl
 
 {############################## Wheel Windows ################################}
 
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.yml
index 4bc3fff71b64a..e2bc7895c6d05 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -22,7 +22,7 @@
 {% set use_conda = use_conda|default(False) %}
 # env: is generated by macros.github_header()
   # Current oldest supported version according to https://endoflife.date/macos
-  MACOSX_DEPLOYMENT_TARGET: "10.15"
+  MACOSX_DEPLOYMENT_TARGET: "12.0"
 
 jobs:
   verify:
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 5d5eeaf8157b4..1a18b2b173acb 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -84,7 +84,7 @@ set(CMAKE_MACOSX_RPATH 1)
 if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
   set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
 else()
-  set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
+  set(CMAKE_OSX_DEPLOYMENT_TARGET 12.0)
 endif()
 
 # Generate a Clang compile_commands.json "compilation database" file for use
diff --git a/ruby/red-arrow/ext/arrow/extconf.rb b/ruby/red-arrow/ext/arrow/extconf.rb
index 28ccd0b2d59e1..a3005cd56f270 100644
--- a/ruby/red-arrow/ext/arrow/extconf.rb
+++ b/ruby/red-arrow/ext/arrow/extconf.rb
@@ -91,7 +91,7 @@
   symbols_in_external_bundles.each do |symbol|
     $DLDFLAGS << " -Wl,-U,#{symbol}"
   end
-  mmacosx_version_min = "-mmacosx-version-min=10.15"
+  mmacosx_version_min = "-mmacosx-version-min=12.0"
   $CFLAGS << " #{mmacosx_version_min}"
   $CXXFLAGS << " #{mmacosx_version_min}"
 end

From 45592f9e1d98da75a7bdc534375b32a004f13e02 Mon Sep 17 00:00:00 2001
From: Xin Hao <haoxinst@gmail.com>
Date: Thu, 29 Aug 2024 22:53:54 +0800
Subject: [PATCH 37/63] GH-43732: [Go] Require Go 1.22 or above (#43864)

### Rationale for this change

https://github.com/apache/arrow/issues/43732

### What changes are included in this PR?


### Are these changes tested?

### Are there any user-facing changes?

* GitHub Issue: #43732

Authored-by: Xin Hao <haoxinst@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 .env                                    |  4 ++--
 .github/workflows/go.yml                | 22 +++++++++++-----------
 ci/docker/conda-integration.dockerfile  |  2 +-
 ci/docker/debian-12-go.dockerfile       |  4 ++--
 dev/release/verify-release-candidate.sh |  8 ++++----
 dev/tasks/tasks.yml                     |  2 +-
 go/arrow/compute/cast_test.go           |  2 +-
 go/arrow/scalar/parse.go                |  2 +-
 go/go.mod                               |  2 +-
 go/parquet/file/file_reader.go          |  2 +-
 go/parquet/schema/reflection.go         |  8 ++++----
 11 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/.env b/.env
index 21f904c3208f6..af647fc8b7a7f 100644
--- a/.env
+++ b/.env
@@ -58,8 +58,8 @@ CUDA=11.2.2
 DASK=latest
 DOTNET=8.0
 GCC_VERSION=""
-GO=1.21.8
-STATICCHECK=v0.4.7
+GO=1.22.6
+STATICCHECK=v0.5.1
 HDFS=3.2.1
 JDK=11
 KARTOTHEK=latest
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index ffd543691d5b2..b9a19d182d5c4 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -62,13 +62,13 @@ jobs:
           {
             "arch-label": "AMD64",
             "arch": "amd64",
-            "go": "1.21",
+            "go": "1.22",
             "runs-on": "ubuntu-latest"
           },
           {
             "arch-label": "AMD64",
             "arch": "amd64",
-            "go": "1.22",
+            "go": "1.23",
             "runs-on": "ubuntu-latest"
           }
           JSON
@@ -78,13 +78,13 @@ jobs:
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "go": "1.21",
+            "go": "1.22",
             "runs-on": ["self-hosted", "arm", "linux"]
           },
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "go": "1.22",
+            "go": "1.23",
             "runs-on": ["self-hosted", "arm", "linux"]
           }
           JSON
@@ -197,7 +197,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       GO: ${{ matrix.go }}
     steps:
@@ -238,7 +238,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       GO: ${{ matrix.go }}
     steps:
@@ -277,7 +277,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
@@ -310,7 +310,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
@@ -323,7 +323,7 @@ jobs:
           go-version: ${{ matrix.go }}
           cache: true
           cache-dependency-path: go/go.sum
-      - name: Install staticcheck      
+      - name: Install staticcheck
         run: |
           . .env
           go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
@@ -368,7 +368,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       ARROW_GO_TESTCGO: "1"
     steps:
@@ -439,7 +439,7 @@ jobs:
           ci/scripts/msys2_setup.sh cgo
       - name: Get required Go version
         run: |
-          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV  
+          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV
       - name: Update CGO Env vars
         shell: msys2 {0}
         run: |
diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile
index c602490d6b729..7ad2e5c0e8008 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -24,7 +24,7 @@ ARG maven=3.8.7
 ARG node=16
 ARG yarn=1.22
 ARG jdk=11
-ARG go=1.21.8
+ARG go=1.22.6
 
 # Install Archery and integration dependencies
 COPY ci/conda_env_archery.txt /arrow/ci/
diff --git a/ci/docker/debian-12-go.dockerfile b/ci/docker/debian-12-go.dockerfile
index c958e6bdee211..4bc683c109eb8 100644
--- a/ci/docker/debian-12-go.dockerfile
+++ b/ci/docker/debian-12-go.dockerfile
@@ -16,8 +16,8 @@
 # under the License.
 
 ARG arch=amd64
-ARG go=1.21
-ARG staticcheck=v0.4.7
+ARG go=1.22
+ARG staticcheck=v0.5.1
 FROM ${arch}/golang:${go}-bookworm
 
 # FROM collects all the args, get back the staticcheck version arg
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 07e765a759ea0..cdea4ca0d00a1 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -24,7 +24,7 @@
 # - JDK >= 11
 # - gcc >= 4.8
 # - Node.js >= 18
-# - Go >= 1.21
+# - Go >= 1.22
 # - Docker
 #
 # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
@@ -403,7 +403,7 @@ install_go() {
     return 0
   fi
 
-  local version=1.21.8
+  local version=1.22.6
   show_info "Installing go version ${version}..."
 
   local arch="$(uname -m)"
@@ -512,7 +512,7 @@ install_maven() {
     show_info "System Maven version ${SYSTEM_MAVEN_VERSION} matches required Maven version ${MAVEN_VERSION}. Skipping installation."
   else
     # Append pipe character to make preview release versions like "X.Y.Z-beta-1" sort
-    # as older than their corresponding release version "X.Y.Z". This works because 
+    # as older than their corresponding release version "X.Y.Z". This works because
     # `sort -V` orders the pipe character lower than any version number character.
     older_version=$(printf '%s\n%s\n' "$SYSTEM_MAVEN_VERSION" "$MAVEN_VERSION" | sed 's/$/|/' | sort -V | sed 's/|$//' | head -n1)
     if [[ "$older_version" == "$SYSTEM_MAVEN_VERSION" ]]; then
@@ -953,7 +953,7 @@ test_go() {
   show_header "Build and test Go libraries"
 
   maybe_setup_go
-  maybe_setup_conda compilers go=1.21
+  maybe_setup_conda compilers go=1.22
 
   pushd go
   go get -v ./...
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 7f52fe7b05232..c6d2f2175d44c 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1475,7 +1475,7 @@ tasks:
         R_PRUNE_DEPS: TRUE
       image: r-clang-sanitizer
 
-  {% for go_version, staticcheck in [("1.21", "v0.4.7"), ("1.22", "latest")] %}
+  {% for go_version, staticcheck in [("1.22", "v0.5.1"), ("1.23", "latest")] %}
   test-debian-12-go-{{ go_version }}:
     ci: github
     template: docker-tests/github.linux.yml
diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go
index fa08467dd3946..db6098225dda8 100644
--- a/go/arrow/compute/cast_test.go
+++ b/go/arrow/compute/cast_test.go
@@ -129,7 +129,7 @@ func checkScalarWithScalars(t *testing.T, funcName string, inputs []scalar.Scala
 			fmt.Fprintf(&b, " (types differed: %s vs %s)",
 				out.(*compute.ScalarDatum).Type(), expected.DataType())
 		}
-		t.Fatalf(b.String())
+		t.Fatal(b.String())
 	}
 }
 
diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go
index 866e627113d88..27db42afa69b1 100644
--- a/go/arrow/scalar/parse.go
+++ b/go/arrow/scalar/parse.go
@@ -329,7 +329,7 @@ func fromListScalar(s ListScalar, v reflect.Value) error {
 		}
 	case *array.Map:
 		// only implementing slice of metadata for now
-		if v.Type().Elem() != reflect.PtrTo(reflect.TypeOf(arrow.Metadata{})) {
+		if v.Type().Elem() != reflect.PointerTo(reflect.TypeOf(arrow.Metadata{})) {
 			return fmt.Errorf("unimplemented fromListScalar type %s to %s", arr.DataType(), v.Type().String())
 		}
 
diff --git a/go/go.mod b/go/go.mod
index a995eee24d563..77f98cefb0f0e 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -16,7 +16,7 @@
 
 module github.com/apache/arrow/go/v18
 
-go 1.21
+go 1.22
 
 require (
 	github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c
diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go
index f838482fbb0e9..f25b882e00647 100644
--- a/go/parquet/file/file_reader.go
+++ b/go/parquet/file/file_reader.go
@@ -233,7 +233,7 @@ func (f *Reader) parseMetaData() error {
 func (f *Reader) handleAadPrefix(fileDecrypt *parquet.FileDecryptionProperties, algo *parquet.Algorithm) (string, error) {
 	aadPrefixInProps := fileDecrypt.AadPrefix()
 	aadPrefix := []byte(aadPrefixInProps)
-	fileHasAadPrefix := algo.Aad.AadPrefix != nil && len(algo.Aad.AadPrefix) > 0
+	fileHasAadPrefix := len(algo.Aad.AadPrefix) > 0
 	aadPrefixInFile := algo.Aad.AadPrefix
 
 	if algo.Aad.SupplyAadPrefix && aadPrefixInProps == "" {
diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go
index 0bec9eb599dc8..51d0a84f2244f 100644
--- a/go/parquet/schema/reflection.go
+++ b/go/parquet/schema/reflection.go
@@ -639,7 +639,7 @@ func typeFromNode(n Node) reflect.Type {
 		}
 
 		if n.RepetitionType() == parquet.Repetitions.Optional {
-			typ = reflect.PtrTo(typ)
+			typ = reflect.PointerTo(typ)
 		} else if n.RepetitionType() == parquet.Repetitions.Repeated {
 			typ = reflect.SliceOf(typ)
 		}
@@ -707,7 +707,7 @@ func typeFromNode(n Node) reflect.Type {
 				elemType = reflect.SliceOf(elemType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				elemType = reflect.PtrTo(elemType)
+				elemType = reflect.PointerTo(elemType)
 			}
 			return elemType
 		case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
@@ -778,7 +778,7 @@ func typeFromNode(n Node) reflect.Type {
 
 			mapType := reflect.MapOf(keyType, valType)
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				mapType = reflect.PtrTo(mapType)
+				mapType = reflect.PointerTo(mapType)
 			}
 			return mapType
 		default:
@@ -796,7 +796,7 @@ func typeFromNode(n Node) reflect.Type {
 				return reflect.SliceOf(structType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				return reflect.PtrTo(structType)
+				return reflect.PointerTo(structType)
 			}
 			return structType
 		}

From 4f91c8f144125bd147c25cb49ac0071c8d28765c Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 29 Aug 2024 23:38:41 +0800
Subject: [PATCH 38/63] GH-43759: [C++] Acero: Minor code enhancement for Join
 (#43760)

### Rationale for this change

Minor style enhancement for join

### What changes are included in this PR?

Minor style enhancement for join

### Are these changes tested?

Covered by existing

### Are there any user-facing changes?

no

* GitHub Issue: #43759

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/arrow/acero/hash_join_dict.cc         |  9 ++-
 cpp/src/arrow/acero/hash_join_node.cc         | 16 ++---
 cpp/src/arrow/acero/hash_join_node.h          |  6 +-
 cpp/src/arrow/acero/swiss_join.cc             |  7 +-
 cpp/src/arrow/compute/light_array_internal.cc | 68 +++++++++----------
 cpp/src/arrow/compute/light_array_internal.h  |  6 +-
 cpp/src/arrow/compute/light_array_test.cc     |  4 +-
 7 files changed, 57 insertions(+), 59 deletions(-)

diff --git a/cpp/src/arrow/acero/hash_join_dict.cc b/cpp/src/arrow/acero/hash_join_dict.cc
index 3aef08e6e9ccf..8db9dddb2c3a0 100644
--- a/cpp/src/arrow/acero/hash_join_dict.cc
+++ b/cpp/src/arrow/acero/hash_join_dict.cc
@@ -225,21 +225,20 @@ Status HashJoinDictBuild::Init(ExecContext* ctx, std::shared_ptr<Array> dictiona
     return Status::OK();
   }
 
-  dictionary_ = dictionary;
+  dictionary_ = std::move(dictionary);
 
   // Initialize encoder
   RowEncoder encoder;
-  std::vector<TypeHolder> encoder_types;
-  encoder_types.emplace_back(value_type_);
+  std::vector<TypeHolder> encoder_types{value_type_};
   encoder.Init(encoder_types, ctx);
 
   // Encode all dictionary values
-  int64_t length = dictionary->data()->length;
+  int64_t length = dictionary_->data()->length;
   if (length >= std::numeric_limits<int32_t>::max()) {
     return Status::Invalid(
         "Dictionary length in hash join must fit into signed 32-bit integer.");
   }
-  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary->data()}, length)));
+  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary_->data()}, length)));
 
   std::vector<int32_t> entries_to_take;
 
diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc
index 67f902e64be93..80dd163ced740 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -61,30 +61,30 @@ Result<std::vector<FieldRef>> HashJoinSchema::ComputePayload(
     const std::vector<FieldRef>& filter, const std::vector<FieldRef>& keys) {
   // payload = (output + filter) - keys, with no duplicates
   std::unordered_set<int> payload_fields;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : keys) {
+  for (const auto& ref : keys) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.erase(match[0]);
   }
 
   std::vector<FieldRef> payload_refs;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
       payload_fields.erase(match[0]);
     }
   }
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
@@ -198,7 +198,7 @@ Status HashJoinSchema::ValidateSchemas(JoinType join_type, const Schema& left_sc
     return Status::Invalid("Different number of key fields on left (", left_keys.size(),
                            ") and right (", right_keys.size(), ") side of the join");
   }
-  if (left_keys.size() < 1) {
+  if (left_keys.empty()) {
     return Status::Invalid("Join key cannot be empty");
   }
   for (size_t i = 0; i < left_keys.size() + right_keys.size(); ++i) {
@@ -432,7 +432,7 @@ Status HashJoinSchema::CollectFilterColumns(std::vector<FieldRef>& left_filter,
         indices[0] -= left_schema.num_fields();
         FieldPath corrected_path(std::move(indices));
         if (right_seen_paths.find(*path) == right_seen_paths.end()) {
-          right_filter.push_back(corrected_path);
+          right_filter.emplace_back(corrected_path);
           right_seen_paths.emplace(std::move(corrected_path));
         }
       } else if (left_seen_paths.find(*path) == left_seen_paths.end()) {
@@ -698,7 +698,7 @@ class HashJoinNode : public ExecNode, public TracedNode {
                std::shared_ptr<Schema> output_schema,
                std::unique_ptr<HashJoinSchema> schema_mgr, Expression filter,
                std::unique_ptr<HashJoinImpl> impl)
-      : ExecNode(plan, inputs, {"left", "right"},
+      : ExecNode(plan, std::move(inputs), {"left", "right"},
                  /*output_schema=*/std::move(output_schema)),
         TracedNode(this),
         join_type_(join_options.join_type),
diff --git a/cpp/src/arrow/acero/hash_join_node.h b/cpp/src/arrow/acero/hash_join_node.h
index ad60019ceabc4..19745b8675cf0 100644
--- a/cpp/src/arrow/acero/hash_join_node.h
+++ b/cpp/src/arrow/acero/hash_join_node.h
@@ -65,9 +65,9 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
   std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
                                            const std::string& right_field_name_suffix);
 
-  bool LeftPayloadIsEmpty() { return PayloadIsEmpty(0); }
+  bool LeftPayloadIsEmpty() const { return PayloadIsEmpty(0); }
 
-  bool RightPayloadIsEmpty() { return PayloadIsEmpty(1); }
+  bool RightPayloadIsEmpty() const { return PayloadIsEmpty(1); }
 
   static int kMissingField() {
     return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
@@ -88,7 +88,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
                                             const SchemaProjectionMap& right_to_filter,
                                             const Expression& filter);
 
-  bool PayloadIsEmpty(int side) {
+  bool PayloadIsEmpty(int side) const {
     assert(side == 0 || side == 1);
     return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
   }
diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc
index 4d0c8187ac6e2..6c783110af571 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -1667,7 +1667,7 @@ Result<std::shared_ptr<ArrayData>> JoinResultMaterialize::FlushBuildColumn(
     const std::shared_ptr<DataType>& data_type, const RowArray* row_array, int column_id,
     uint32_t* row_ids) {
   ResizableArrayData output;
-  output.Init(data_type, pool_, bit_util::Log2(num_rows_));
+  RETURN_NOT_OK(output.Init(data_type, pool_, bit_util::Log2(num_rows_)));
 
   for (size_t i = 0; i <= null_ranges_.size(); ++i) {
     int row_id_begin =
@@ -2247,8 +2247,9 @@ Result<ExecBatch> JoinResidualFilter::MaterializeFilterInput(
         build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD);
     for (int i = 0; i < num_build_cols; ++i) {
       ResizableArrayData column_data;
-      column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i), pool_,
-                       bit_util::Log2(num_batch_rows));
+      RETURN_NOT_OK(
+          column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i),
+                           pool_, bit_util::Log2(num_batch_rows)));
       if (auto idx = to_key.get(i); idx != SchemaProjectionMap::kMissingField) {
         RETURN_NOT_OK(build_keys_->DecodeSelected(&column_data, idx, num_batch_rows,
                                                   key_ids_maybe_null, pool_));
diff --git a/cpp/src/arrow/compute/light_array_internal.cc b/cpp/src/arrow/compute/light_array_internal.cc
index 4f235925d0fb6..e4b1f1b8cdd63 100644
--- a/cpp/src/arrow/compute/light_array_internal.cc
+++ b/cpp/src/arrow/compute/light_array_internal.cc
@@ -118,10 +118,9 @@ Result<KeyColumnMetadata> ColumnMetadataFromDataType(
     const std::shared_ptr<DataType>& type) {
   const bool is_extension = type->id() == Type::EXTENSION;
   const std::shared_ptr<DataType>& typ =
-      is_extension
-          ? arrow::internal::checked_pointer_cast<ExtensionType>(type->GetSharedPtr())
-                ->storage_type()
-          : type;
+      is_extension ? arrow::internal::checked_cast<const ExtensionType*>(type.get())
+                         ->storage_type()
+                   : type;
 
   if (typ->id() == Type::DICTIONARY) {
     auto bit_width =
@@ -205,22 +204,25 @@ Status ColumnArraysFromExecBatch(const ExecBatch& batch,
                                    column_arrays);
 }
 
-void ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
-                              MemoryPool* pool, int log_num_rows_min) {
+Status ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
+                                MemoryPool* pool, int log_num_rows_min) {
 #ifndef NDEBUG
   if (num_rows_allocated_ > 0) {
-    ARROW_DCHECK(data_type_ != NULLPTR);
-    KeyColumnMetadata metadata_before =
-        ColumnMetadataFromDataType(data_type_).ValueOrDie();
-    KeyColumnMetadata metadata_after = ColumnMetadataFromDataType(data_type).ValueOrDie();
+    ARROW_DCHECK(data_type_ != nullptr);
+    const KeyColumnMetadata& metadata_before = column_metadata_;
+    ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata metadata_after,
+                          ColumnMetadataFromDataType(data_type));
     ARROW_DCHECK(metadata_before.is_fixed_length == metadata_after.is_fixed_length &&
                  metadata_before.fixed_length == metadata_after.fixed_length);
   }
 #endif
+  ARROW_DCHECK(data_type != nullptr);
+  ARROW_ASSIGN_OR_RAISE(column_metadata_, ColumnMetadataFromDataType(data_type));
   Clear(/*release_buffers=*/false);
   log_num_rows_min_ = log_num_rows_min;
   data_type_ = data_type;
   pool_ = pool;
+  return Status::OK();
 }
 
 void ResizableArrayData::Clear(bool release_buffers) {
@@ -246,8 +248,6 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     num_rows_allocated_new *= 2;
   }
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
   if (buffers_[kFixedLengthBuffer] == NULLPTR) {
     ARROW_DCHECK(buffers_[kValidityBuffer] == NULLPTR &&
                  buffers_[kVariableLengthBuffer] == NULLPTR);
@@ -258,8 +258,8 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes, pool_));
     memset(mutable_data(kValidityBuffer), 0,
            bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes);
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
@@ -271,7 +271,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
-                num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes,
+                num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes,
                 pool_));
       }
     } else {
@@ -300,15 +300,15 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     memset(mutable_data(kValidityBuffer) + bytes_for_bits_before, 0,
            bytes_for_bits_after - bytes_for_bits_before);
 
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes));
         memset(mutable_data(kFixedLengthBuffer) + bytes_for_bits_before, 0,
                bytes_for_bits_after - bytes_for_bits_before);
       } else {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
-            num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes));
+            num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes));
       }
     } else {
       RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
@@ -323,10 +323,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
 }
 
 Status ResizableArrayData::ResizeVaryingLengthBuffer() {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  if (!column_metadata.is_fixed_length) {
+  if (!column_metadata_.is_fixed_length) {
     int64_t min_new_size = buffers_[kFixedLengthBuffer]->data_as<int32_t>()[num_rows_];
     ARROW_DCHECK(var_len_buf_size_ > 0);
     if (var_len_buf_size_ < min_new_size) {
@@ -343,23 +340,19 @@ Status ResizableArrayData::ResizeVaryingLengthBuffer() {
 }
 
 KeyColumnArray ResizableArrayData::column_array() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-  return KeyColumnArray(column_metadata, num_rows_,
+  return KeyColumnArray(column_metadata_, num_rows_,
                         buffers_[kValidityBuffer]->mutable_data(),
                         buffers_[kFixedLengthBuffer]->mutable_data(),
                         buffers_[kVariableLengthBuffer]->mutable_data());
 }
 
 std::shared_ptr<ArrayData> ResizableArrayData::array_data() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  auto valid_count = arrow::internal::CountSetBits(
-      buffers_[kValidityBuffer]->data(), /*offset=*/0, static_cast<int64_t>(num_rows_));
+  auto valid_count =
+      arrow::internal::CountSetBits(buffers_[kValidityBuffer]->data(), /*bit_offset=*/0,
+                                    static_cast<int64_t>(num_rows_));
   int null_count = static_cast<int>(num_rows_) - static_cast<int>(valid_count);
 
-  if (column_metadata.is_fixed_length) {
+  if (column_metadata_.is_fixed_length) {
     return ArrayData::Make(data_type_, num_rows_,
                            {buffers_[kValidityBuffer], buffers_[kFixedLengthBuffer]},
                            null_count);
@@ -493,10 +486,12 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
   ARROW_DCHECK(num_rows_before >= 0);
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target->num_rows() == 0) {
-    target->Init(source->type, pool, kLogNumRows);
+    RETURN_NOT_OK(target->Init(source->type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target->ResizeFixedLengthBuffers(num_rows_after));
 
+  // Since target->Init is called before, we can assume that the ColumnMetadata
+  // would never fail to be created
   KeyColumnMetadata column_metadata =
       ColumnMetadataFromDataType(source->type).ValueOrDie();
 
@@ -647,11 +642,12 @@ Status ExecBatchBuilder::AppendNulls(const std::shared_ptr<DataType>& type,
   int num_rows_before = target.num_rows();
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target.num_rows() == 0) {
-    target.Init(type, pool, kLogNumRows);
+    RETURN_NOT_OK(target.Init(type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target.ResizeFixedLengthBuffers(num_rows_after));
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(type).ValueOrDie();
+  ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata column_metadata,
+                        ColumnMetadataFromDataType(type));
 
   // Process fixed length buffer
   //
@@ -708,7 +704,7 @@ Status ExecBatchBuilder::AppendSelected(MemoryPool* pool, const ExecBatch& batch
       const Datum& data = batch.values[col_ids ? col_ids[i] : i];
       ARROW_DCHECK(data.is_array());
       const std::shared_ptr<ArrayData>& array_data = data.array();
-      values_[i].Init(array_data->type, pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(array_data->type, pool, kLogNumRows));
     }
   }
 
@@ -739,7 +735,7 @@ Status ExecBatchBuilder::AppendNulls(MemoryPool* pool,
   if (values_.empty()) {
     values_.resize(types.size());
     for (size_t i = 0; i < types.size(); ++i) {
-      values_[i].Init(types[i], pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(types[i], pool, kLogNumRows));
     }
   }
 
diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h
index 995c4211998e0..b8e48f096baeb 100644
--- a/cpp/src/arrow/compute/light_array_internal.h
+++ b/cpp/src/arrow/compute/light_array_internal.h
@@ -295,8 +295,8 @@ class ARROW_EXPORT ResizableArrayData {
   /// \param pool The pool to make allocations on
   /// \param log_num_rows_min All resize operations will allocate at least enough
   ///                         space for (1 << log_num_rows_min) rows
-  void Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
-            int log_num_rows_min);
+  Status Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
+              int log_num_rows_min);
 
   /// \brief Resets the array back to an empty state
   /// \param release_buffers If true then allocated memory is released and the
@@ -351,6 +351,8 @@ class ARROW_EXPORT ResizableArrayData {
   static constexpr int64_t kNumPaddingBytes = 64;
   int log_num_rows_min_;
   std::shared_ptr<DataType> data_type_;
+  // Would be valid if data_type_ != NULLPTR.
+  KeyColumnMetadata column_metadata_{};
   MemoryPool* pool_;
   int num_rows_;
   int num_rows_allocated_;
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index cc02d489d138f..98a1ab8b7acae 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -295,7 +295,7 @@ TEST(ResizableArrayData, Basic) {
         arrow::internal::checked_pointer_cast<FixedWidthType>(type)->bit_width() / 8;
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/16);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/16));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
@@ -330,7 +330,7 @@ TEST(ResizableArrayData, Binary) {
     ARROW_SCOPED_TRACE("Type: ", type->ToString());
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/4);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/4));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());

From 6b242538cf5723da5735814af9a18d0a9b41d5a4 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <pitrou@free.fr>
Date: Thu, 29 Aug 2024 21:14:39 +0200
Subject: [PATCH 39/63] GH-43885: [C++][CI] Catch potential integer overflow in
 PoolBuffer (#43886)

This should fix https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=71200

* GitHub Issue: #43885

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/memory_pool.cc | 11 +++++++++--
 testing                      |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 1e855311a98ed..34207781277d1 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -858,7 +858,7 @@ class PoolBuffer final : public ResizableBuffer {
     }
     uint8_t* ptr = mutable_data();
     if (!ptr || capacity > capacity_) {
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(capacity);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(capacity));
       if (ptr) {
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
       } else {
@@ -878,7 +878,7 @@ class PoolBuffer final : public ResizableBuffer {
     if (ptr && shrink_to_fit && new_size <= size_) {
       // Buffer is non-null and is not growing, so shrink to the requested size without
       // excess space.
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(new_size);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(new_size));
       if (capacity_ != new_capacity) {
         // Buffer hasn't got yet the requested size.
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
@@ -916,6 +916,13 @@ class PoolBuffer final : public ResizableBuffer {
   }
 
  private:
+  static Result<int64_t> RoundCapacity(int64_t capacity) {
+    if (capacity > std::numeric_limits<int64_t>::max() - 63) {
+      return Status::OutOfMemory("capacity too large");
+    }
+    return bit_util::RoundUpToMultipleOf64(capacity);
+  }
+
   MemoryPool* pool_;
   int64_t alignment_;
 };
diff --git a/testing b/testing
index 735ae7128d571..4d209492d514c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 735ae7128d571398dd798d7ff004adebeb342883
+Subproject commit 4d209492d514c2d3cb2d392681b9aa00e6d8da1c

From 07420b0c56066326bd409e9537ee3d43ab6b1a51 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Fri, 30 Aug 2024 07:50:53 +0530
Subject: [PATCH 40/63] GH-43869: [Java][CI] Flight related failure in the
 AMD64 Windows Server 2022 Java JDK 11 CI (#43850)

### Rationale for this change

CIs have been consistently failing on windows recently due to an issue with derby configuration. This PR investigates a solution for this.

### What changes are included in this PR?

Changing the flow of the exception handling and state return.

### Are these changes tested?

Via existing test cases.

### Are there any user-facing changes?

No
* GitHub Issue: #43869

Authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../flight/sql/example/FlightSqlExample.java  | 26 +++++++++----------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
index e7127faf97539..67bfc85c48602 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
@@ -181,9 +181,8 @@ public static void main(String[] args) throws Exception {
 
   public FlightSqlExample(final Location location, final String dbName) {
     // TODO Constructor should not be doing work.
-    checkState(
-        removeDerbyDatabaseIfExists(dbName) && populateDerbyDatabase(dbName),
-        "Failed to reset Derby database!");
+    checkState(removeDerbyDatabaseIfExists(dbName), "Failed to clear Derby database!");
+    checkState(populateDerbyDatabase(dbName), "Failed to populate Derby database!");
     databaseUri = "jdbc:derby:target/" + dbName;
     final ConnectionFactory connectionFactory =
         new DriverManagerConnectionFactory(databaseUri, new Properties());
@@ -253,36 +252,35 @@ public FlightSqlExample(final Location location, final String dbName) {
   }
 
   public static boolean removeDerbyDatabaseIfExists(final String dbName) {
-    boolean wasSuccess;
     final Path path = Paths.get("target" + File.separator + dbName);
 
     try (final Stream<Path> walk = Files.walk(path)) {
       /*
        * Iterate over all paths to delete, mapping each path to the outcome of its own
-       * deletion as a boolean representing whether or not each individual operation was
-       * successful; then reduce all booleans into a single answer, and store that into
-       * `wasSuccess`, which will later be returned by this method.
+       * deletion as a boolean representing whether each individual operation was
+       * successful; then reduce all booleans into a single answer.
        * If for whatever reason the resulting `Stream<Boolean>` is empty, throw an `IOException`;
        * this not expected.
        */
-      wasSuccess =
+      boolean unused =
           walk.sorted(Comparator.reverseOrder())
               .map(Path::toFile)
               .map(File::delete)
               .reduce(Boolean::logicalAnd)
               .orElseThrow(IOException::new);
-    } catch (IOException e) {
+    } catch (NoSuchFileException e) {
       /*
        * The only acceptable scenario for an `IOException` to be thrown here is if
        * an attempt to delete an non-existing file takes place -- which should be
        * alright, since they would be deleted anyway.
        */
-      if (!(wasSuccess = e instanceof NoSuchFileException)) {
-        LOGGER.error(format("Failed attempt to clear DerbyDB: <%s>", e.getMessage()), e);
-      }
+      LOGGER.error(format("No existing Derby database to delete.: <%s>", e.getMessage()), e);
+      return true;
+    } catch (Exception e) {
+      LOGGER.error(format("Failed attempt to clear DerbyDB.: <%s>", e.getMessage()), e);
+      return false;
     }
-
-    return wasSuccess;
+    return true;
   }
 
   private static boolean populateDerbyDatabase(final String dbName) {

From 63b34c97c5d3ca6d20dacb9e92b404986f1d7d62 Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:00:50 -0400
Subject: [PATCH 41/63] GH-43837: [Go][IPC] Consolidate StreamWriter and
 FileWriter, ensuring that EOS indicator is written in file (#43890)

### Rationale for this change

Fixes: #43837

Much of the logic between the ipc stream writer and the file writer was split. This PR changes the file writer so that it uses a stream writer internally, ensuring that a valid stream is embedded within the file.

**TODO**
- [x] Remove @ bkietz's commits

### What changes are included in this PR?

- Refactor `fileWriter` to embed `streamWriter` and defer relevant methods
- Add test

### Are these changes tested?

Yes

### Are there any user-facing changes?

Go-generated IPC files will contain the EOS indicator

* GitHub Issue: #43837

Authored-by: Joel Lubinitsky <joellubi@gmail.com>
Signed-off-by: Joel Lubinitsky <joellubi@gmail.com>
---
 go/arrow/ipc/file_test.go   | 40 ++++++++++++++++++
 go/arrow/ipc/file_writer.go | 82 +++++++++----------------------------
 go/arrow/ipc/writer.go      | 12 +++---
 3 files changed, 65 insertions(+), 69 deletions(-)

diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go
index dea63579cfea6..b9a4547a5126a 100644
--- a/go/arrow/ipc/file_test.go
+++ b/go/arrow/ipc/file_test.go
@@ -17,13 +17,17 @@
 package ipc_test
 
 import (
+	"bytes"
 	"fmt"
 	"os"
 	"testing"
 
+	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
 	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
 )
 
 func TestFile(t *testing.T) {
@@ -75,3 +79,39 @@ func TestFileCompressed(t *testing.T) {
 		}
 	}
 }
+
+func TestFileEmbedsStream(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	recs := arrdata.Records["primitives"]
+	schema := recs[0].Schema()
+
+	var buf bytes.Buffer
+	w, err := ipc.NewFileWriter(&buf, ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer w.Close()
+
+	for _, rec := range recs {
+		require.NoError(t, w.Write(rec))
+	}
+
+	require.NoError(t, w.Close())
+
+	// we should be able to read a valid ipc stream within the ipc file
+
+	// create an ipc stream reader, skipping the file magic+padding bytes
+	rdr, err := ipc.NewReader(bytes.NewReader(buf.Bytes()[8:]), ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer rdr.Release()
+
+	// the stream reader should know to stop before the footer if the EOS indicator is properly written
+	var i int
+	for rdr.Next() {
+		rec := rdr.Record()
+		require.Truef(t, array.RecordEqual(rec, recs[i]), "records[%d] differ", i)
+		i++
+	}
+
+	require.NoError(t, rdr.Err())
+}
diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go
index 8582c81baf2fe..9a3d7d3dbeb02 100644
--- a/go/arrow/ipc/file_writer.go
+++ b/go/arrow/ipc/file_writer.go
@@ -37,23 +37,17 @@ type PayloadWriter interface {
 	Close() error
 }
 
-type pwriter struct {
-	w   io.WriteSeeker
-	pos int64
+type fileWriter struct {
+	streamWriter
 
 	schema *arrow.Schema
 	dicts  []fileBlock
 	recs   []fileBlock
 }
 
-func (w *pwriter) Start() error {
+func (w *fileWriter) Start() error {
 	var err error
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in start: %w", err)
-	}
-
 	// only necessary to align to 8-byte boundary at the start of the file
 	_, err = w.Write(Magic)
 	if err != nil {
@@ -65,10 +59,10 @@ func (w *pwriter) Start() error {
 		return fmt.Errorf("arrow/ipc: could not align start block: %w", err)
 	}
 
-	return err
+	return w.streamWriter.Start()
 }
 
-func (w *pwriter) WritePayload(p Payload) error {
+func (w *fileWriter) WritePayload(p Payload) error {
 	blk := fileBlock{Offset: w.pos, Meta: 0, Body: p.size}
 	n, err := writeIPCPayload(w, p)
 	if err != nil {
@@ -77,11 +71,6 @@ func (w *pwriter) WritePayload(p Payload) error {
 
 	blk.Meta = int32(n)
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in write-payload: %w", err)
-	}
-
 	switch flatbuf.MessageHeader(p.msg) {
 	case flatbuf.MessageHeaderDictionaryBatch:
 		w.dicts = append(w.dicts, blk)
@@ -92,27 +81,18 @@ func (w *pwriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *pwriter) Close() error {
+func (w *fileWriter) Close() error {
 	var err error
 
-	// write file footer
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in close: %w", err)
+	if err = w.streamWriter.Close(); err != nil {
+		return err
 	}
 
 	pos := w.pos
-	err = writeFileFooter(w.schema, w.dicts, w.recs, w)
-	if err != nil {
+	if err = writeFileFooter(w.schema, w.dicts, w.recs, w); err != nil {
 		return fmt.Errorf("arrow/ipc: could not write file footer: %w", err)
 	}
 
-	// write file footer length
-	err = w.updatePos() // not strictly needed as we passed w to writeFileFooter...
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not compute file footer length: %w", err)
-	}
-
 	size := w.pos - pos
 	if size <= 0 {
 		return fmt.Errorf("arrow/ipc: invalid file footer size (size=%d)", size)
@@ -133,13 +113,7 @@ func (w *pwriter) Close() error {
 	return nil
 }
 
-func (w *pwriter) updatePos() error {
-	var err error
-	w.pos, err = w.w.Seek(0, io.SeekCurrent)
-	return err
-}
-
-func (w *pwriter) align(align int32) error {
+func (w *fileWriter) align(align int32) error {
 	remainder := paddedLength(w.pos, align) - w.pos
 	if remainder == 0 {
 		return nil
@@ -149,12 +123,6 @@ func (w *pwriter) align(align int32) error {
 	return err
 }
 
-func (w *pwriter) Write(p []byte) (int, error) {
-	n, err := w.w.Write(p)
-	w.pos += int64(n)
-	return n, err
-}
-
 func writeIPCPayload(w io.Writer, p Payload) (int, error) {
 	n, err := writeMessage(p.meta, kArrowIPCAlignment, w)
 	if err != nil {
@@ -259,18 +227,12 @@ func (ps payloads) Release() {
 
 // FileWriter is an Arrow file writer.
 type FileWriter struct {
-	w io.WriteSeeker
+	w io.Writer
 
 	mem memory.Allocator
 
-	header struct {
-		started bool
-		offset  int64
-	}
-
-	footer struct {
-		written bool
-	}
+	headerStarted bool
+	footerWritten bool
 
 	pw PayloadWriter
 
@@ -289,7 +251,7 @@ type FileWriter struct {
 }
 
 // NewFileWriter opens an Arrow file using the provided writer w.
-func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
+func NewFileWriter(w io.Writer, opts ...Option) (*FileWriter, error) {
 	var (
 		cfg = newConfig(opts...)
 		err error
@@ -297,7 +259,7 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 
 	f := FileWriter{
 		w:               w,
-		pw:              &pwriter{w: w, schema: cfg.schema, pos: -1},
+		pw:              &fileWriter{streamWriter: streamWriter{w: w}, schema: cfg.schema},
 		mem:             cfg.alloc,
 		schema:          cfg.schema,
 		codec:           cfg.codec,
@@ -306,12 +268,6 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 		compressors:     make([]compressor, cfg.compressNP),
 	}
 
-	pos, err := f.w.Seek(0, io.SeekCurrent)
-	if err != nil {
-		return nil, fmt.Errorf("arrow/ipc: could not seek current position: %w", err)
-	}
-	f.header.offset = pos
-
 	return &f, err
 }
 
@@ -321,7 +277,7 @@ func (f *FileWriter) Close() error {
 		return fmt.Errorf("arrow/ipc: could not write empty file: %w", err)
 	}
 
-	if f.footer.written {
+	if f.footerWritten {
 		return nil
 	}
 
@@ -329,7 +285,7 @@ func (f *FileWriter) Close() error {
 	if err != nil {
 		return fmt.Errorf("arrow/ipc: could not close payload writer: %w", err)
 	}
-	f.footer.written = true
+	f.footerWritten = true
 
 	return nil
 }
@@ -367,14 +323,14 @@ func (f *FileWriter) Write(rec arrow.Record) error {
 }
 
 func (f *FileWriter) checkStarted() error {
-	if !f.header.started {
+	if !f.headerStarted {
 		return f.start()
 	}
 	return nil
 }
 
 func (f *FileWriter) start() error {
-	f.header.started = true
+	f.headerStarted = true
 	err := f.pw.Start()
 	if err != nil {
 		return err
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index 02c67635bb2fd..5a280fbf84a1f 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -37,18 +37,18 @@ import (
 	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
-type swriter struct {
+type streamWriter struct {
 	w   io.Writer
 	pos int64
 }
 
-func (w *swriter) Start() error { return nil }
-func (w *swriter) Close() error {
+func (w *streamWriter) Start() error { return nil }
+func (w *streamWriter) Close() error {
 	_, err := w.Write(kEOS[:])
 	return err
 }
 
-func (w *swriter) WritePayload(p Payload) error {
+func (w *streamWriter) WritePayload(p Payload) error {
 	_, err := writeIPCPayload(w, p)
 	if err != nil {
 		return err
@@ -56,7 +56,7 @@ func (w *swriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *swriter) Write(p []byte) (int, error) {
+func (w *streamWriter) Write(p []byte) (int, error) {
 	n, err := w.w.Write(p)
 	w.pos += int64(n)
 	return n, err
@@ -118,7 +118,7 @@ func NewWriter(w io.Writer, opts ...Option) *Writer {
 	return &Writer{
 		w:              w,
 		mem:            cfg.alloc,
-		pw:             &swriter{w: w},
+		pw:             &streamWriter{w: w},
 		schema:         cfg.schema,
 		codec:          cfg.codec,
 		emitDictDeltas: cfg.emitDictDeltas,

From 3b310bbf5cc6fb55052dd28107235ca4c734cacf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 05:31:18 +0900
Subject: [PATCH 42/63] MINOR: [JS] Bump @swc/helpers from 0.5.11 to 0.5.12 in
 /js (#43901)

Bumps [@ swc/helpers](https://github.com/swc-project/swc) from 0.5.11 to 0.5.12.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/swc-project/swc/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ swc/helpers&package-manager=npm_and_yarn&previous-version=0.5.11&new-version=0.5.12)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/yarn.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/js/yarn.lock b/js/yarn.lock
index dc1fc99a0ecf4..b4e208b4a61a3 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1201,9 +1201,9 @@
   integrity sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==
 
 "@swc/helpers@^0.5.11":
-  version "0.5.11"
-  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7"
-  integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A==
+  version "0.5.12"
+  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.12.tgz#37aaca95284019eb5d2207101249435659709f4b"
+  integrity sha512-KMZNXiGibsW9kvZAO1Pam2JPTDBm+KSHMMHWdsyI/1DbIZjT2A6Gy3hblVXUMEDvUAKq+e0vL0X0o54owWji7g==
   dependencies:
     tslib "^2.4.0"
 

From 1ecfb31b7dcebc486f404bc0ed74c1cf644bb51b Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Sun, 1 Sep 2024 19:17:42 -0400
Subject: [PATCH 43/63] GH-43665: [R] Remove references to bindings vignette
 (#43889)

### Rationale for this change

The writing-bindings vignette was removed in
https://github.com/apache/arrow/pull/41576#issuecomment-2134327019. It
turns out there were more references to it throughout the docs that I
failed to remove

### What changes are included in this PR?

Deleting x-refs that don't exist anymore.

### Are these changes tested?

Not really

### Are there any user-facing changes?

The docs won't point you at links that 404.
* GitHub Issue: #43665
---
 docs/source/developers/guide/resources.rst    |  1 -
 .../guide/step_by_step/arrow_codebase.rst     |  3 --
 .../developers/guide/tutorials/r_tutorial.rst | 28 -------------------
 r/vignettes/developing.Rmd                    |  3 --
 4 files changed, 35 deletions(-)

diff --git a/docs/source/developers/guide/resources.rst b/docs/source/developers/guide/resources.rst
index b5905af65499b..5b598ab1296ac 100644
--- a/docs/source/developers/guide/resources.rst
+++ b/docs/source/developers/guide/resources.rst
@@ -71,7 +71,6 @@ Contributing
 
 - :ref:`contributing`
 - `Arrow R Developer Guide <https://arrow.apache.org/docs/r/articles/developing.html>`_
-- `Writing Bindings article for R package <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
 
 Reproducible examples:
 
diff --git a/docs/source/developers/guide/step_by_step/arrow_codebase.rst b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
index 0c194ab3a3f70..c4ea61d89ff80 100644
--- a/docs/source/developers/guide/step_by_step/arrow_codebase.rst
+++ b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
@@ -150,6 +150,3 @@ C++ we must create the binding manually to use it in that implementation.
       When writing bindings between C++ compute functions and R functions,
       the aim is to expose the C++ functionality via the same interface as
       existing R functions.
-
-      To read the full content on the topic of R bindings read through the
-      `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
diff --git a/docs/source/developers/guide/tutorials/r_tutorial.rst b/docs/source/developers/guide/tutorials/r_tutorial.rst
index 62d5cfcbc76c2..3fba873bff0a9 100644
--- a/docs/source/developers/guide/tutorials/r_tutorial.rst
+++ b/docs/source/developers/guide/tutorials/r_tutorial.rst
@@ -27,22 +27,6 @@ R tutorials
 ***********
 
 
-Writing Bindings Walkthrough
-============================
-
-The first R package tutorial to be included in the New Contributor's
-guide is a **Walkthrough** added in the **Writing Bindings**
-vignette. With time we will try to include additional tutorials
-directly into this guide.
-
-This tutorial will show how to do a binding of a C++ function
-`starts_with() <https://arrow.apache.org/docs/cpp/compute.html#containment-tests>`_
-to the (base) R function ``startsWith()``.
-
-To view the tutorial follow the
-`Walkthrough section of the Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html#walkthrough>`_.
-
-
 R tutorial on adding a lubridate binding
 ========================================
 
@@ -56,11 +40,6 @@ The binding will be added to the ``expression.R`` file in the
 R package. But you can also follow these steps in case you are
 adding a binding that will live somewhere else.
 
-.. seealso::
-
-   To read more about the philosophy behind R bindings, refer to the
-   `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
-
 This tutorial is different from the :ref:`step_by_step` as we
 will be working on a specific case. This tutorial is not meant
 as a step-by-step guide.
@@ -170,13 +149,6 @@ equivalent data types. lubridate's ``mday()`` function has no additional
 arguments and there are also no option classes associated with Arrow C++
 function ``day()``.
 
-.. note::
-
-   To see what to do if there is an option class associated with the
-   function you are binding, refer to
-   `Examining the C++ function <https://arrow.apache.org/docs/r/articles/developers/bindings.html#examining-the-c-function>`_ from the Writing Bindings
-   article.
-
 Looking at the code in ``expressions.R`` we can see the day function
 is already specified/mapped on the R package side:
 `<https://github.com/apache/arrow/blob/658bec37aa5cbdd53b5e4cdc81b8ba3962e67f11/r/R/expression.R#L63-L64>`_
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index 248a80292a029..147f9cc028d78 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -52,6 +52,3 @@ There are a number of ways in which we do this:
 * [Running R with the C++ debugger attached](https://arrow.apache.org/docs/r/articles/developers/debugging.html)
 * [In-depth guide to how the package installation works](https://arrow.apache.org/docs/r/articles/developers/install_details.html)
 * [Using Docker to diagnose a bug or test a feature on a specific OS](https://arrow.apache.org/docs/r/articles/developers/docker.html)
-* [Writing bindings between R functions and Arrow Acero functions](https://arrow.apache.org/docs/r/articles/developers/bindings.html)
-
-

From f919da13ec0250ecf9ddf8f57dbd17b22830fa21 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:21:49 +0900
Subject: [PATCH 44/63] MINOR: [JS] Bump ix from 6.0.0 to 7.0.0 in /js (#43898)

Bumps [ix](https://github.com/ReactiveX/IxJS) from 6.0.0 to 7.0.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/ReactiveX/IxJS/releases">ix's releases</a>.</em></p>
<blockquote>
<h2>v7.0.0</h2>
<h3>Bug Fixes</h3>
<ul>
<li><strong>actions:</strong> fix docs workflow (<a href="https://github.com/ReactiveX/IxJS/commit/512e370f428a970f1f55dbf3a0dcd528e3bbc0ba">512e370</a>)</li>
<li><strong>changelog:</strong> include latest changelog in npm packages (<a href="https://github.com/ReactiveX/IxJS/commit/f25687b46a547b510184ff6558884628e37063b5">f25687b</a>)</li>
</ul>
<h3>chore</h3>
<ul>
<li><strong>build:</strong> fix gulp async task completion (<a href="https://github.com/ReactiveX/IxJS/commit/c68e97c91d565baf33de989d3f6190a8b5046adc">c68e97c</a>)</li>
<li><strong>release:</strong> 7.0.0 (<a href="https://github.com/ReactiveX/IxJS/commit/b8890f1010347ac2242398801d1405e51a4d3396">b8890f1</a>)</li>
</ul>
<h3>Documentation</h3>
<ul>
<li>
<p><strong>CHANGELOG:</strong> 7.0.0 (<a href="https://github.com/ReactiveX/IxJS/commit/28a24f945dc2f5ba05669399713b6e2299ebb28c">28a24f9</a>)</p>
</li>
<li>
<p>Fix exports (<a href="https://redirect.github.com/ReactiveX/IxJS/issues/371">#371</a>) (<a href="https://github.com/ReactiveX/IxJS/commit/d461eae02cc63bafc4e3256d5d59541d1ff7e43f">d461eae</a>), closes <a href="https://redirect.github.com/ReactiveX/IxJS/issues/371">#371</a></p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/ReactiveX/IxJS/blob/master/CHANGELOG.md">ix's changelog</a>.</em></p>
<blockquote>
<h1><a href="https://github.com/ReactiveX/IxJS/compare/v6.0.0...v7.0.0">7.0.0</a> (2024-07-10)</h1>
<h3>Bug Fixes</h3>
<ul>
<li><strong>actions:</strong> fix docs workflow (<a href="https://github.com/ReactiveX/IxJS/commit/512e370f428a970f1f55dbf3a0dcd528e3bbc0ba">512e370</a>)</li>
<li><strong>changelog:</strong> include latest changelog in npm packages (<a href="https://github.com/ReactiveX/IxJS/commit/f25687b46a547b510184ff6558884628e37063b5">f25687b</a>)</li>
</ul>
<h3>chore</h3>
<ul>
<li>
<p><strong>build:</strong> fix gulp async task completion (<a href="https://github.com/ReactiveX/IxJS/commit/c68e97c91d565baf33de989d3f6190a8b5046adc">c68e97c</a>)</p>
</li>
<li>
<p>Fix exports (<a href="https://redirect.github.com/ReactiveX/IxJS/issues/371">#371</a>) (<a href="https://github.com/ReactiveX/IxJS/commit/d461eae02cc63bafc4e3256d5d59541d1ff7e43f">d461eae</a>), closes <a href="https://redirect.github.com/ReactiveX/IxJS/issues/371">#371</a></p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/ReactiveX/IxJS/commit/b8890f1010347ac2242398801d1405e51a4d3396"><code>b8890f1</code></a> chore(release): 7.0.0</li>
<li><a href="https://github.com/ReactiveX/IxJS/commit/28a24f945dc2f5ba05669399713b6e2299ebb28c"><code>28a24f9</code></a> docs(CHANGELOG): 7.0.0</li>
<li><a href="https://github.com/ReactiveX/IxJS/commit/f25687b46a547b510184ff6558884628e37063b5"><code>f25687b</code></a> fix(changelog): include latest changelog in npm packages</li>
<li><a href="https://github.com/ReactiveX/IxJS/commit/c68e97c91d565baf33de989d3f6190a8b5046adc"><code>c68e97c</code></a> chore(build): fix gulp async task completion</li>
<li><a href="https://github.com/ReactiveX/IxJS/commit/512e370f428a970f1f55dbf3a0dcd528e3bbc0ba"><code>512e370</code></a> fix(actions): fix docs workflow</li>
<li><a href="https://github.com/ReactiveX/IxJS/commit/d461eae02cc63bafc4e3256d5d59541d1ff7e43f"><code>d461eae</code></a> Fix exports (<a href="https://redirect.github.com/ReactiveX/IxJS/issues/371">#371</a>)</li>
<li>See full diff in <a href="https://github.com/ReactiveX/IxJS/compare/v6.0.0...v7.0.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=ix&package-manager=npm_and_yarn&previous-version=6.0.0&new-version=7.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/package.json | 2 +-
 js/yarn.lock    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/js/package.json b/js/package.json
index cbf0670e018b6..1688747d24290 100644
--- a/js/package.json
+++ b/js/package.json
@@ -95,7 +95,7 @@
     "gulp-terser": "2.1.0",
     "gulp-typescript": "5.0.1",
     "gulp-vinyl-size": "1.1.4",
-    "ix": "6.0.0",
+    "ix": "7.0.0",
     "jest": "29.7.0",
     "jest-silent-reporter": "0.6.0",
     "memfs": "4.9.2",
diff --git a/js/yarn.lock b/js/yarn.lock
index b4e208b4a61a3..d1a089501a388 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -4381,10 +4381,10 @@ istextorbinary@^3.0.0:
     binaryextensions "^2.2.0"
     textextensions "^3.2.0"
 
-ix@6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/ix/-/ix-6.0.0.tgz#c1875523f8090c7146dc3ac3412a763663887f27"
-  integrity sha512-B/KeYkHtOWbr3ttckqWT9uha2ixw9fGVDxX+DwVXhO+P5eOhyCadt+aC30hRBvG+do+tbI3xbYDMYN6dp1C4Vw==
+ix@7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/ix/-/ix-7.0.0.tgz#df4c9a242614178f0836aa3cd1965441fae301d1"
+  integrity sha512-hgVnphYh+ytIEsmjeym5wP2GPaM3+RZf7zCrZXE7gjwwmpIBEg0t6GRX7BbdXzTosXCstEAzdPxpyplGBYnIbw==
   dependencies:
     "@types/node" ">=13.7.4"
     tslib "^2.6.2"

From 2ffb186cf0b1f226188d5ddc88f038e0504b97ea Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:22:19 +0900
Subject: [PATCH 45/63] MINOR: [JS] Bump @typescript-eslint/eslint-plugin from
 7.12.0 to 7.18.0 in /js (#43900)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [@ typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 7.12.0 to 7.18.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/typescript-eslint/typescript-eslint/releases"><code>@​typescript-eslint/eslint-plugin</code>'s releases</a>.</em></p>
<blockquote>
<h2>v7.18.0</h2>
<h2>7.18.0 (2024-07-29)</h2>
<h3>🚀 Features</h3>
<ul>
<li><strong>types:</strong> update ECMA versions (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9634">#9634</a>)</li>
</ul>
<h3>🩹 Fixes</h3>
<ul>
<li><strong>eslint-plugin:</strong> [no-unnecessary-type-assertion] prevent runtime error when asserting a variable declared in default TS lib (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9660">#9660</a>)</li>
<li><strong>eslint-plugin:</strong> [unbound-method] report on destructuring in function parameters (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/8952">#8952</a>)</li>
<li><strong>eslint-plugin:</strong> [no-duplicate-type-constituents] shouldn't report on error types (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9600">#9600</a>)</li>
<li><strong>eslint-plugin:</strong> [strict-boolean-expressions] support branded booleans (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9297">#9297</a>)</li>
</ul>
<h3>❤️  Thank You</h3>
<ul>
<li>auvred <a href="https://github.com/auvred"><code>@​auvred</code></a></li>
<li>Oliver Salzburg</li>
<li>Vinccool96</li>
<li>Yukihiro Hasegawa <a href="https://github.com/y-hsgw"><code>@​y-hsgw</code></a></li>
</ul>
<p>You can read about our <a href="https://main--typescript-eslint.netlify.app/users/versioning">versioning strategy</a> and <a href="https://main--typescript-eslint.netlify.app/users/releases">releases</a> on our website.</p>
<h2>v7.17.0</h2>
<h2>7.17.0 (2024-07-22)</h2>
<h3>🚀 Features</h3>
<ul>
<li><strong>eslint-plugin:</strong> backport no-unsafe-function type, no-wrapper-object-types from v8 to v7 (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9507">#9507</a>)</li>
<li><strong>eslint-plugin:</strong> [return-await] add option to report in error-handling scenarios only, and deprecate &quot;never&quot; (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9364">#9364</a>)</li>
</ul>
<h3>🩹 Fixes</h3>
<ul>
<li><strong>eslint-plugin:</strong> [no-floating-promises] check top-level type assertions (and more) (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9043">#9043</a>)</li>
<li><strong>eslint-plugin:</strong> [strict-boolean-expressions] consider assertion function argument a boolean context (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9074">#9074</a>)</li>
<li><strong>eslint-plugin:</strong> [no-unnecessary-condition] false positive on optional private field (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9602">#9602</a>)</li>
<li><strong>typescript-estree:</strong> don't infer single-run when --fix is in proces.argv (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9577">#9577</a>)</li>
<li><strong>typescript-estree:</strong> disable single-run inference with extraFileExtensions (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9580">#9580</a>)</li>
<li><strong>website:</strong> expose ATA types to eslint instance (<a href="https://redirect.github.com/typescript-eslint/typescript-eslint/pull/9598">#9598</a>)</li>
</ul>
<h3>❤️  Thank You</h3>
<ul>
<li>Armano <a href="https://github.com/armano2"><code>@​armano2</code></a></li>
<li>Josh Goldberg ✨</li>
<li>Kirk Waiblinger <a href="https://github.com/kirkwaiblinger"><code>@​kirkwaiblinger</code></a></li>
<li>StyleShit <a href="https://github.com/StyleShit"><code>@​StyleShit</code></a></li>
</ul>
<p>You can read about our <a href="https://main--typescript-eslint.netlify.app/users/versioning">versioning strategy</a> and <a href="https://main--typescript-eslint.netlify.app/users/releases">releases</a> on our website.</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/eslint-plugin/CHANGELOG.md"><code>@​typescript-eslint/eslint-plugin</code>'s changelog</a>.</em></p>
<blockquote>
<h2>7.18.0 (2024-07-29)</h2>
<h3>🩹 Fixes</h3>
<ul>
<li>
<p><strong>eslint-plugin:</strong> [no-unnecessary-type-assertion] prevent runtime error when asserting a variable declared in default TS lib</p>
</li>
<li>
<p><strong>eslint-plugin:</strong> [unbound-method] report on destructuring in function parameters</p>
</li>
<li>
<p><strong>eslint-plugin:</strong> [no-duplicate-type-constituents] shouldn't report on error types</p>
</li>
<li>
<p><strong>eslint-plugin:</strong> [strict-boolean-expressions] support branded booleans</p>
</li>
</ul>
<h3>❤️  Thank You</h3>
<ul>
<li>auvred</li>
<li>Oliver Salzburg</li>
<li>Vinccool96</li>
<li>Yukihiro Hasegawa</li>
</ul>
<p>You can read about our <a href="https://main--typescript-eslint.netlify.app/users/versioning">versioning strategy</a> and <a href="https://main--typescript-eslint.netlify.app/users/releases">releases</a> on our website.</p>
<h2>7.17.0 (2024-07-22)</h2>
<h3>🚀 Features</h3>
<ul>
<li>
<p><strong>eslint-plugin:</strong> backport no-unsafe-function type, no-wrapper-object-types from v8 to v7</p>
</li>
<li>
<p><strong>eslint-plugin:</strong> [return-await] add option to report in error-handling scenarios only, and deprecate &quot;never&quot;</p>
</li>
</ul>
<h3>🩹 Fixes</h3>
<ul>
<li>
<p><strong>eslint-plugin:</strong> [no-floating-promises] check top-level type assertions (and more)</p>
</li>
<li>
<p><strong>eslint-plugin:</strong> [strict-boolean-expressions] consider assertion function argument a boolean context</p>
</li>
<li>
<p><strong>eslint-plugin:</strong> [no-unnecessary-condition] false positive on optional private field</p>
</li>
</ul>
<h3>❤️  Thank You</h3>
<ul>
<li>Armano</li>
<li>Josh Goldberg ✨</li>
<li>Kirk Waiblinger</li>
<li>StyleShit</li>
</ul>
<p>You can read about our <a href="https://main--typescript-eslint.netlify.app/users/versioning">versioning strategy</a> and <a href="https://main--typescript-eslint.netlify.app/users/releases">releases</a> on our website.</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/35cf3d2b2b9611c3812b120c461d863c7881ac04"><code>35cf3d2</code></a> chore(release): publish 7.18.0</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/46a5709e434a0a252a4ffd5bfe32bf883adbb418"><code>46a5709</code></a> docs: link no-duplicate-type-constituents and no-redundant-type-constituents ...</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/9eec7903698a98f61ddb933b7209d126e3400bb1"><code>9eec790</code></a> fix(eslint-plugin): [strict-boolean-expressions] support branded booleans (<a href="https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin/issues/9">#9</a>...</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/9927a29eb83ce43bb6ecedbd0943207543eadc80"><code>9927a29</code></a> docs: add ast-spec, type-utils docs with docusaurus-plugin-typedoc (<a href="https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin/issues/9293">#9293</a>)</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/fb0ca4cbe79cd4b27300a42b31d6a7f5ea13e8e8"><code>fb0ca4c</code></a> docs: remove unnecessary v8 links (<a href="https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin/issues/9611">#9611</a>)</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/3591b78dc70592684a263755430477e74c7a5133"><code>3591b78</code></a> fix(eslint-plugin): [no-duplicate-type-constituents] shouldn't report on erro...</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/64b4e43112eb52de79c6ad6454d0b243cfc1fc21"><code>64b4e43</code></a> fix(eslint-plugin): [unbound-method] report on destructuring in function para...</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/bf4abdf3ce9454c8a291e78f32994c721fb5fe82"><code>bf4abdf</code></a> fix(eslint-plugin): [no-unnecessary-type-assertion] prevent runtime error whe...</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/6b92aa5ce61d86869493b764f77d882bb4d14ce7"><code>6b92aa5</code></a> chore: reorg repo level utils, lint and typecheck repo files (<a href="https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin/issues/9618">#9618</a>)</li>
<li><a href="https://github.com/typescript-eslint/typescript-eslint/commit/1e32db13dbf3c73423254f425662ed874f0b62b6"><code>1e32db1</code></a> chore: enable radix (<a href="https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin/issues/9563">#9563</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/typescript-eslint/typescript-eslint/commits/v7.18.0/packages/eslint-plugin">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ typescript-eslint/eslint-plugin&package-manager=npm_and_yarn&previous-version=7.12.0&new-version=7.18.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/package.json |   2 +-
 js/yarn.lock    | 108 ++++++++++++++++++++++++------------------------
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/js/package.json b/js/package.json
index 1688747d24290..d8a784b784d3c 100644
--- a/js/package.json
+++ b/js/package.json
@@ -72,7 +72,7 @@
     "@types/glob": "8.1.0",
     "@types/jest": "29.5.12",
     "@types/multistream": "4.1.3",
-    "@typescript-eslint/eslint-plugin": "7.12.0",
+    "@typescript-eslint/eslint-plugin": "7.18.0",
     "@typescript-eslint/parser": "7.14.1",
     "async-done": "2.0.0",
     "benny": "3.7.1",
diff --git a/js/yarn.lock b/js/yarn.lock
index d1a089501a388..e8223fba9aad2 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1421,16 +1421,16 @@
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.12.0.tgz#f87a32e8972b8a60024f2f8f12205e7c8108bc41"
-  integrity sha512-7F91fcbuDf/d3S8o21+r3ZncGIke/+eWk0EpO21LXhDfLahriZF9CGj4fbAetEjlaBdjdSm9a6VeXbpbT6Z40Q==
+"@typescript-eslint/eslint-plugin@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.18.0.tgz#b16d3cf3ee76bf572fdf511e79c248bdec619ea3"
+  integrity sha512-94EQTWZ40mzBc42ATNIBimBEDltSJ9RQHCC8vc/PDbxi4k8dVwUAv4o98dk50M1zB+JGFxp43FP7f8+FP8R6Sw==
   dependencies:
     "@eslint-community/regexpp" "^4.10.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/type-utils" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/type-utils" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
     graphemer "^1.4.0"
     ignore "^5.3.1"
     natural-compare "^1.4.0"
@@ -1447,14 +1447,6 @@
     "@typescript-eslint/visitor-keys" "7.14.1"
     debug "^4.3.4"
 
-"@typescript-eslint/scope-manager@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58"
-  integrity sha512-itF1pTnN6F3unPak+kutH9raIkL3lhH1YRPGgt7QQOh43DQKVJXmWkpb+vpc/TiDHs6RSd9CTbDsc/Y+Ygq7kg==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-
 "@typescript-eslint/scope-manager@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.14.1.tgz#63de7a577bc6fe8ee6e412a5b85499f654b93ee5"
@@ -1463,39 +1455,33 @@
     "@typescript-eslint/types" "7.14.1"
     "@typescript-eslint/visitor-keys" "7.14.1"
 
-"@typescript-eslint/type-utils@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908"
-  integrity sha512-lib96tyRtMhLxwauDWUp/uW3FMhLA6D0rJ8T7HmH7x23Gk1Gwwu8UZ94NMXBvOELn6flSPiBrCKlehkiXyaqwA==
+"@typescript-eslint/scope-manager@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.18.0.tgz#c928e7a9fc2c0b3ed92ab3112c614d6bd9951c83"
+  integrity sha512-jjhdIE/FPF2B7Z1uzc6i3oWKbGcHb87Qw7AWj6jmEqNOfDFbJWtjt/XfwCpvNkpGWlcJaog5vTR+VV8+w9JflA==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+
+"@typescript-eslint/type-utils@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.18.0.tgz#2165ffaee00b1fbbdd2d40aa85232dab6998f53b"
+  integrity sha512-XL0FJXuCLaDuX2sYqZUUSOJ2sG5/i1AAze+axqmLnSkNEVMVYLF+cbwlB2w8D1tinFuSikHmFta+P+HOofrLeA==
   dependencies:
-    "@typescript-eslint/typescript-estree" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
     debug "^4.3.4"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/types@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981"
-  integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg==
-
 "@typescript-eslint/types@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.14.1.tgz#a43a540dbe5df7f2a11269683d777fc50b4350aa"
   integrity sha512-mL7zNEOQybo5R3AavY+Am7KLv8BorIv7HCYS5rKoNZKQD9tsfGUpO4KdAn3sSUvTiS4PQkr2+K0KJbxj8H9NDg==
 
-"@typescript-eslint/typescript-estree@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9"
-  integrity sha512-5bwqLsWBULv1h6pn7cMW5dXX/Y2amRqLaKqsASVwbBHMZSnHqE/HN4vT4fE0aFsiwxYvr98kqOWh1a8ZKXalCQ==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-    debug "^4.3.4"
-    globby "^11.1.0"
-    is-glob "^4.0.3"
-    minimatch "^9.0.4"
-    semver "^7.6.0"
-    ts-api-utils "^1.3.0"
+"@typescript-eslint/types@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.18.0.tgz#b90a57ccdea71797ffffa0321e744f379ec838c9"
+  integrity sha512-iZqi+Ds1y4EDYUtlOOC+aUmxnE9xS/yCigkjA7XpTKV6nCBd3Hp/PRGGmdwnfkV2ThMyYldP1wRpm/id99spTQ==
 
 "@typescript-eslint/typescript-estree@7.14.1":
   version "7.14.1"
@@ -1511,23 +1497,29 @@
     semver "^7.6.0"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0"
-  integrity sha512-Y6hhwxwDx41HNpjuYswYp6gDbkiZ8Hin9Bf5aJQn1bpTs3afYY4GX+MPYxma8jtoIV2GRwTM/UJm/2uGCVv+DQ==
+"@typescript-eslint/typescript-estree@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.18.0.tgz#b5868d486c51ce8f312309ba79bdb9f331b37931"
+  integrity sha512-aP1v/BSPnnyhMHts8cf1qQ6Q1IFwwRvAQGRvBFkWlo3/lH29OXA3Pts+c10nxRxIBrDnoMqzhgdwVe5f2D6OzA==
   dependencies:
-    "@eslint-community/eslint-utils" "^4.4.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/typescript-estree" "7.12.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+    debug "^4.3.4"
+    globby "^11.1.0"
+    is-glob "^4.0.3"
+    minimatch "^9.0.4"
+    semver "^7.6.0"
+    ts-api-utils "^1.3.0"
 
-"@typescript-eslint/visitor-keys@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.12.0.tgz#c053b55a996679528beeedd8e565710ce1ae1ad3"
-  integrity sha512-uZk7DevrQLL3vSnfFl5bj4sL75qC9D6EdjemIdbtkuUmIheWpuiiylSY01JxJE7+zGrOWDZrp1WxOuDntvKrHQ==
+"@typescript-eslint/utils@7.18.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.18.0.tgz#bca01cde77f95fc6a8d5b0dbcbfb3d6ca4be451f"
+  integrity sha512-kK0/rNa2j74XuHVcoCZxdFBMF+aq/vH83CXAOHieC+2Gis4mF8jJXT5eAfyD3K0sAxtPuwxaIOIOvhwzVDt/kw==
   dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    eslint-visitor-keys "^3.4.3"
+    "@eslint-community/eslint-utils" "^4.4.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
 
 "@typescript-eslint/visitor-keys@7.14.1":
   version "7.14.1"
@@ -1537,6 +1529,14 @@
     "@typescript-eslint/types" "7.14.1"
     eslint-visitor-keys "^3.4.3"
 
+"@typescript-eslint/visitor-keys@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.18.0.tgz#0564629b6124d67607378d0f0332a0495b25e7d7"
+  integrity sha512-cDF0/Gf81QpY3xYyJKDV14Zwdmid5+uuENhjH2EqFaF0ni+yAyq/LzMaIJdhNJXZI7uLzwIlA+V7oWoyn6Curg==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    eslint-visitor-keys "^3.4.3"
+
 "@ungap/structured-clone@^1.2.0":
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"

From 7f88ae7d5e7f18911c36630ac0ebd17ae78ab686 Mon Sep 17 00:00:00 2001
From: Max Feinleib <82004873+feinleib@users.noreply.github.com>
Date: Mon, 2 Sep 2024 10:03:09 -0400
Subject: [PATCH 46/63] MINOR: [R] Fix monospace formatting in dplyr-funcs-doc
 (#43461)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added a closing backtick for the `.groups` argument in `summarise()`.

### Rationale for this change

Improves the formatting and appearance of the Acero documentation page at <https://arrow.apache.org/docs/r/reference/acero.html>.

### Are these changes tested?

Yes, I ran `devtools::check()` on this change. I got one warning (related to my environment) and one note that isn't going to be fixable. I would consider this result "passing."

```
❯ checking top-level files ... WARNING
  A complete check needs the 'checkbashisms' script.
  See section ‘Configure and cleanup’ in the ‘Writing R Extensions’
  manual.

❯ checking installed package size ... NOTE
    installed size is 54.7Mb
    sub-directories of 1Mb or more:
      R      5.1Mb
      libs  49.0Mb

0 errors ✔ | 1 warning ✖ | 1 note ✖
```

### Are there any user-facing changes?

This is a documentation change.

Authored-by: Max Feinleib <82004873+feinleib@users.noreply.github.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/R/arrow-package.R   | 2 +-
 r/R/dplyr-funcs-doc.R | 2 +-
 r/man/acero.Rd        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 44dfbbcd5c7e7..4c3b78e085c6e 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -41,7 +41,7 @@ supported_dplyr_methods <- list(
   collect = NULL,
   summarise = c(
     "window functions not currently supported;",
-    'arguments `.drop = FALSE` and `.groups = "rowwise" not supported'
+    'arguments `.drop = FALSE` and `.groups = "rowwise"` not supported'
   ),
   group_by = NULL,
   groups = NULL,
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 7f0627c33d010..4f90dd16b266f 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -67,7 +67,7 @@
 #' * [`slice_min()`][dplyr::slice_min()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_sample()`][dplyr::slice_sample()]: slicing within groups not supported; `replace = TRUE` and the `weight_by` argument not supported; `n` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_tail()`][dplyr::slice_tail()]: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
-#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise" not supported
+#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise"` not supported
 #' * [`tally()`][dplyr::tally()]
 #' * [`transmute()`][dplyr::transmute()]
 #' * [`ungroup()`][dplyr::ungroup()]
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 9ef9cd7dda6fb..aceb533a151f6 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -54,7 +54,7 @@ Table into an R \code{tibble}.
 \item \code{\link[dplyr:slice]{slice_min()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_sample()}}: slicing within groups not supported; \code{replace = TRUE} and the \code{weight_by} argument not supported; \code{n} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_tail()}}: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and `.groups = "rowwise" not supported
+\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and \code{.groups = "rowwise"} not supported
 \item \code{\link[dplyr:count]{tally()}}
 \item \code{\link[dplyr:transmute]{transmute()}}
 \item \code{\link[dplyr:group_by]{ungroup()}}

From a8df190a43b0ddbb2009cd55b54f4cbb4d9c3377 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 2 Sep 2024 10:03:49 -0400
Subject: [PATCH 47/63] GH-43894: [R] format_aggregation() should print options
 too (#43896)

### Rationale for this change

If you printed the inner query after summarize, it would show what function was being called but not the function options.

### What changes are included in this PR?

One-line code change plus a test

### Are these changes tested?

Yes. Interestingly, it did not seem that `format_aggregations()` was tested before.

### Are there any user-facing changes?

Technically yes, but few users would likely see this.
* GitHub Issue: #43894

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/R/dplyr-summarize.R                   |  2 +-
 r/tests/testthat/test-dplyr-summarize.R | 38 +++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index a9ad750de7c42..42fd245e5ab9d 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -241,7 +241,7 @@ group_types <- function(.data, schema = NULL) {
 }
 
 format_aggregation <- function(x) {
-  paste0(x$fun, "(", paste(map(x$data, ~ .$ToString()), collapse = ","), ")")
+  Expression$create(x$fun, args = x$data, options = x$options)$ToString()
 }
 
 # This function evaluates an expression and returns the post-summarize
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 95212407acf9d..8d2a209df547f 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -955,6 +955,44 @@ test_that("Summarize with 0 arguments", {
   )
 })
 
+test_that("Printing aggregation expressions", {
+  q <- tbl |>
+    arrow_table() |>
+    summarize(
+      total = sum(int, na.rm = TRUE),
+      prod = prod(int, na.rm = TRUE),
+      any = any(lgl, na.rm = TRUE),
+      all = all(lgl, na.rm = TRUE),
+      mean = mean(int, na.rm = TRUE),
+      sd = sd(int, na.rm = TRUE),
+      var = var(int, na.rm = TRUE),
+      n_distinct = n_distinct(chr),
+      min = min(int, na.rm = TRUE),
+      max = max(int, na.rm = TRUE)
+    )
+  expect_output(
+    print(q$.data),
+    "Table (query)
+int: int32
+lgl: bool
+chr: string
+
+* Aggregations:
+total: sum(int, {skip_nulls=true, min_count=0})
+prod: product(int, {skip_nulls=true, min_count=0})
+any: any(lgl, {skip_nulls=true, min_count=0})
+all: all(lgl, {skip_nulls=true, min_count=0})
+mean: mean(int, {skip_nulls=true, min_count=0})
+sd: stddev(int, {ddof=1, skip_nulls=true, min_count=0})
+var: variance(int, {ddof=1, skip_nulls=true, min_count=0})
+n_distinct: count_distinct(chr, {mode=ALL})
+min: min(int, {skip_nulls=true, min_count=0})
+max: max(int, {skip_nulls=true, min_count=0})
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+})
+
 test_that("Not supported: window functions", {
   compare_dplyr_binding(
     .input %>%

From 9ab9532a208d5632b0f8b5035a207235b5e6b828 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Mon, 2 Sep 2024 16:35:26 +0200
Subject: [PATCH 48/63] GH-25118: [Python] Make NumPy an optional runtime
 dependency   (#41904)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Being able to run pyarrow without requiring numpy.

### What changes are included in this PR?

If numpy is not present we are able to import pyarrow and run functionality.
A new CI job has been created to run some basic tests without numpy.

### Are these changes tested?

Yes via CI.

### Are there any user-facing changes?

Yes, NumPy can be removed from the user installation and pyarrow functionality still works

* GitHub Issue: #25118

Lead-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/python.yml                  |   6 +
 docker-compose.yml                            |  32 ++++
 python/CMakeLists.txt                         |   4 +-
 python/pyarrow/_compute.pyx                   |  16 +-
 python/pyarrow/array.pxi                      |   5 +
 python/pyarrow/builder.pxi                    |  14 +-
 python/pyarrow/conftest.py                    |  13 +-
 python/pyarrow/includes/libarrow_python.pxd   |   2 +-
 python/pyarrow/lib.pyx                        |  12 +-
 python/pyarrow/pandas_compat.py               |  79 +++++----
 python/pyarrow/src/arrow/python/inference.cc  |   4 +-
 python/pyarrow/src/arrow/python/iterators.h   |   6 +-
 .../arrow/python/{init.cc => numpy_init.cc}   |  13 +-
 .../src/arrow/python/{init.h => numpy_init.h} |   5 +-
 .../pyarrow/src/arrow/python/numpy_internal.h |  19 ++-
 .../pyarrow/src/arrow/python/python_test.cc   |   2 +-
 .../src/arrow/python/python_to_arrow.cc       |  11 +-
 python/pyarrow/table.pxi                      |   3 +
 python/pyarrow/tensor.pxi                     |  15 ++
 python/pyarrow/tests/conftest.py              |   1 +
 .../tests/interchange/test_conversion.py      |  35 ++--
 .../interchange/test_interchange_spec.py      |  33 ++--
 python/pyarrow/tests/parquet/common.py        |   5 +-
 python/pyarrow/tests/parquet/test_basic.py    |   5 +-
 .../pyarrow/tests/parquet/test_data_types.py  |  13 +-
 python/pyarrow/tests/parquet/test_dataset.py  |   5 +-
 python/pyarrow/tests/parquet/test_datetime.py |   5 +-
 python/pyarrow/tests/parquet/test_metadata.py |   7 +-
 python/pyarrow/tests/parquet/test_pandas.py   |   5 +-
 python/pyarrow/tests/strategies.py            |  10 +-
 .../pyarrow/tests/test_adhoc_memory_leak.py   |   5 +-
 python/pyarrow/tests/test_array.py            | 100 +++++++++--
 python/pyarrow/tests/test_builder.py          |  11 +-
 python/pyarrow/tests/test_compute.py          |  85 ++++++----
 python/pyarrow/tests/test_convert_builtin.py  | 155 +++++++++++-------
 python/pyarrow/tests/test_cpp_internals.py    |   8 +
 python/pyarrow/tests/test_csv.py              |  44 ++++-
 python/pyarrow/tests/test_cuda.py             |   5 +-
 .../pyarrow/tests/test_cuda_numba_interop.py  |   5 +-
 python/pyarrow/tests/test_cython.py           |   4 +
 python/pyarrow/tests/test_dataset.py          |  55 ++++---
 .../pyarrow/tests/test_dataset_encryption.py  |   7 +-
 python/pyarrow/tests/test_dlpack.py           |  46 +++---
 python/pyarrow/tests/test_extension_type.py   |  77 ++++++---
 python/pyarrow/tests/test_feather.py          |  10 +-
 python/pyarrow/tests/test_flight.py           |   6 +-
 python/pyarrow/tests/test_io.py               |  38 +++--
 python/pyarrow/tests/test_ipc.py              |  10 +-
 python/pyarrow/tests/test_json.py             |   8 +-
 python/pyarrow/tests/test_pandas.py           |  62 +++----
 python/pyarrow/tests/test_scalars.py          |  59 +++++--
 python/pyarrow/tests/test_schema.py           |   6 +-
 python/pyarrow/tests/test_sparse_tensor.py    |   5 +-
 python/pyarrow/tests/test_strategies.py       |   5 +
 python/pyarrow/tests/test_substrait.py        |   2 +
 python/pyarrow/tests/test_table.py            |  29 +++-
 python/pyarrow/tests/test_tensor.py           |   5 +-
 python/pyarrow/tests/test_types.py            |  16 +-
 python/pyarrow/tests/test_udf.py              |  13 +-
 python/pyarrow/tests/test_without_numpy.py    |  58 +++++++
 python/pyarrow/tests/util.py                  |  19 +--
 python/pyarrow/types.pxi                      |  85 +++++-----
 62 files changed, 1008 insertions(+), 420 deletions(-)
 rename python/pyarrow/src/arrow/python/{init.cc => numpy_init.cc} (78%)
 rename python/pyarrow/src/arrow/python/{init.h => numpy_init.h} (93%)
 create mode 100644 python/pyarrow/tests/test_without_numpy.py

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 854d792f3100d..90d3a50af3705 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -59,6 +59,7 @@ jobs:
           - conda-python-3.9-nopandas
           - conda-python-3.8-pandas-1.0
           - conda-python-3.10-pandas-latest
+          - conda-python-3.10-no-numpy
         include:
           - name: conda-python-docs
             cache: conda-python-3.9
@@ -83,6 +84,11 @@ jobs:
             title: AMD64 Conda Python 3.10 Pandas latest
             python: "3.10"
             pandas: latest
+          - name: conda-python-3.10-no-numpy
+            cache: conda-python-3.10
+            image: conda-python-no-numpy
+            title: AMD64 Conda Python 3.10 without NumPy
+            python: "3.10"
     env:
       PYTHON: ${{ matrix.python || 3.8 }}
       UBUNTU: ${{ matrix.ubuntu || 20.04 }}
diff --git a/docker-compose.yml b/docker-compose.yml
index 3045cf015bc26..97d6e1158ea03 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -126,6 +126,7 @@ x-hierarchy:
         - conda-python-hdfs
         - conda-python-java-integration
         - conda-python-jpype
+        - conda-python-no-numpy
         - conda-python-spark
         - conda-python-substrait
   - conda-verify-rc
@@ -1258,6 +1259,37 @@ services:
     volumes: *conda-volumes
     command: *python-conda-command
 
+  conda-python-no-numpy:
+    # Usage:
+    #   docker-compose build conda
+    #   docker-compose build conda-cpp
+    #   docker-compose build conda-python
+    #   docker-compose build conda-python-no-numpy
+    #   docker-compose run --rm conda-python-no-numpy
+    image: ${REPO}:${ARCH}-conda-python-${PYTHON}-no-numpy
+    build:
+      context: .
+      dockerfile: ci/docker/conda-python.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-conda-python-${PYTHON}
+      args:
+        repo: ${REPO}
+        arch: ${ARCH}
+        python: ${PYTHON}
+    shm_size: *shm-size
+    environment:
+      <<: [*common, *ccache, *sccache]
+      PARQUET_REQUIRE_ENCRYPTION:  # inherit
+      HYPOTHESIS_PROFILE:  # inherit
+      PYARROW_TEST_HYPOTHESIS:  # inherit
+    volumes: *conda-volumes
+    command:
+      ["
+        /arrow/ci/scripts/cpp_build.sh /arrow /build &&
+        /arrow/ci/scripts/python_build.sh /arrow /build &&
+        mamba uninstall -y numpy &&
+        /arrow/ci/scripts/python_test.sh /arrow"]
+
   conda-python-docs:
     # Usage:
     #   archery docker run conda-python-docs
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 1a18b2b173acb..eda4ff4ca5f07 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -339,17 +339,17 @@ set(PYARROW_CPP_SRCS
     ${PYARROW_CPP_SOURCE_DIR}/gdb.cc
     ${PYARROW_CPP_SOURCE_DIR}/helpers.cc
     ${PYARROW_CPP_SOURCE_DIR}/inference.cc
-    ${PYARROW_CPP_SOURCE_DIR}/init.cc
     ${PYARROW_CPP_SOURCE_DIR}/io.cc
     ${PYARROW_CPP_SOURCE_DIR}/ipc.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
+    ${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_test.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/serialize.cc
     ${PYARROW_CPP_SOURCE_DIR}/udf.cc)
-set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc
+set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
                             PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                        SKIP_UNITY_BUILD_INCLUSION ON)
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 0e860eaf4c6b8..d39120934d5fd 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -33,7 +33,10 @@ from pyarrow.util import _DEPR_MSG
 from libcpp cimport bool as c_bool
 
 import inspect
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import warnings
 
 
@@ -43,6 +46,11 @@ _substrait_msg = (
 )
 
 
+SUPPORTED_INPUT_ARR_TYPES = (list, tuple)
+if np is not None:
+    SUPPORTED_INPUT_ARR_TYPES += (np.ndarray, )
+
+
 def _pas():
     global __pas
     if __pas is None:
@@ -473,7 +481,7 @@ cdef class MetaFunction(Function):
 
 cdef _pack_compute_args(object values, vector[CDatum]* out):
     for val in values:
-        if isinstance(val, (list, np.ndarray)):
+        if isinstance(val, SUPPORTED_INPUT_ARR_TYPES):
             val = lib.asarray(val)
 
         if isinstance(val, Array):
@@ -2189,7 +2197,7 @@ class QuantileOptions(_QuantileOptions):
 
     def __init__(self, q=0.5, *, interpolation="linear", skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, interpolation, skip_nulls, min_count)
 
@@ -2222,7 +2230,7 @@ class TDigestOptions(_TDigestOptions):
 
     def __init__(self, q=0.5, *, delta=100, buffer_size=500, skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, delta, buffer_size, skip_nulls, min_count)
 
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1587de0e6b744..93c44297590e8 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -50,6 +50,8 @@ cdef _sequence_to_array(object sequence, object mask, object size,
 
 
 cdef inline _is_array_like(obj):
+    if np is None:
+        return False
     if isinstance(obj, np.ndarray):
         return True
     return pandas_api._have_pandas_internal() and pandas_api.is_array_like(obj)
@@ -1608,6 +1610,9 @@ cdef class Array(_PandasConvertible):
         """
         self._assert_cpu()
 
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef:
             PyObject* out
             PandasOptions c_options
diff --git a/python/pyarrow/builder.pxi b/python/pyarrow/builder.pxi
index 2af39e2c589e6..fbab5bbdb5a01 100644
--- a/python/pyarrow/builder.pxi
+++ b/python/pyarrow/builder.pxi
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
+
 
 cdef class StringBuilder(_Weakrefable):
     """
@@ -42,10 +44,10 @@ cdef class StringBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringBuilder only accepts string objects')
 
@@ -108,10 +110,10 @@ cdef class StringViewBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringViewBuilder only accepts string objects')
 
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 29c850c142da1..10a2e72f923cb 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -25,7 +25,6 @@
 from pyarrow.tests.util import windows_has_tzdata
 import sys
 
-import numpy as np
 
 groups = [
     'acero',
@@ -46,6 +45,8 @@
     'lz4',
     'memory_leak',
     'nopandas',
+    'nonumpy',
+    'numpy',
     'orc',
     'pandas',
     'parquet',
@@ -81,6 +82,8 @@
     'lz4': Codec.is_available('lz4'),
     'memory_leak': False,
     'nopandas': False,
+    'nonumpy': False,
+    'numpy': False,
     'orc': False,
     'pandas': False,
     'parquet': False,
@@ -158,6 +161,12 @@
 except ImportError:
     defaults['nopandas'] = True
 
+try:
+    import numpy  # noqa
+    defaults['numpy'] = True
+except ImportError:
+    defaults['nonumpy'] = True
+
 try:
     import pyarrow.parquet  # noqa
     defaults['parquet'] = True
@@ -327,6 +336,7 @@ def unary_agg_func_fixture():
     Register a unary aggregate function (mean)
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, x):
         return pa.scalar(np.nanmean(x))
@@ -352,6 +362,7 @@ def varargs_agg_func_fixture():
     Register a unary aggregate function
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, *args):
         sum = 0.0
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 9fcc97aaf0a9c..96725c9c3862b 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -248,7 +248,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
     CResult[PyObject*] StringToTzinfo(c_string)
 
 
-cdef extern from "arrow/python/init.h":
+cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py":
     int arrow_init_numpy() except -1
 
 
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index c72841c299566..6b82eb6566896 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -21,7 +21,10 @@
 
 import datetime
 import decimal as _pydecimal
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import os
 import sys
 
@@ -32,8 +35,11 @@ from pyarrow.includes.common cimport PyObject_to_object
 cimport pyarrow.includes.libarrow_python as libarrow_python
 cimport cpython as cp
 
-# Initialize NumPy C API
-arrow_init_numpy()
+
+# Initialize NumPy C API only if numpy was able to be imported
+if np is not None:
+    arrow_init_numpy()
+
 # Initialize PyArrow C++ API
 # (used from some of our C++ code, see e.g. ARROW-5260)
 import_pyarrow()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index fcccf564fc619..7fbde36bc23e9 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -30,13 +30,17 @@
 import re
 import warnings
 
-import numpy as np
-
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
 
 
 _logical_type_map = {}
+_numpy_logical_type_map = {}
+_pandas_logical_type_map = {}
 
 
 def get_logical_type_map():
@@ -85,27 +89,32 @@ def get_logical_type(arrow_type):
         return 'object'
 
 
-_numpy_logical_type_map = {
-    np.bool_: 'bool',
-    np.int8: 'int8',
-    np.int16: 'int16',
-    np.int32: 'int32',
-    np.int64: 'int64',
-    np.uint8: 'uint8',
-    np.uint16: 'uint16',
-    np.uint32: 'uint32',
-    np.uint64: 'uint64',
-    np.float32: 'float32',
-    np.float64: 'float64',
-    'datetime64[D]': 'date',
-    np.str_: 'string',
-    np.bytes_: 'bytes',
-}
+def get_numpy_logical_type_map():
+    global _numpy_logical_type_map
+    if not _numpy_logical_type_map:
+        _numpy_logical_type_map.update({
+            np.bool_: 'bool',
+            np.int8: 'int8',
+            np.int16: 'int16',
+            np.int32: 'int32',
+            np.int64: 'int64',
+            np.uint8: 'uint8',
+            np.uint16: 'uint16',
+            np.uint32: 'uint32',
+            np.uint64: 'uint64',
+            np.float32: 'float32',
+            np.float64: 'float64',
+            'datetime64[D]': 'date',
+            np.str_: 'string',
+            np.bytes_: 'bytes',
+        })
+    return _numpy_logical_type_map
 
 
 def get_logical_type_from_numpy(pandas_collection):
+    numpy_logical_type_map = get_numpy_logical_type_map()
     try:
-        return _numpy_logical_type_map[pandas_collection.dtype.type]
+        return numpy_logical_type_map[pandas_collection.dtype.type]
     except KeyError:
         if hasattr(pandas_collection.dtype, 'tz'):
             return 'datetimetz'
@@ -1023,18 +1032,23 @@ def _is_generated_index_name(name):
     return re.match(pattern, name) is not None
 
 
-_pandas_logical_type_map = {
-    'date': 'datetime64[D]',
-    'datetime': 'datetime64[ns]',
-    'datetimetz': 'datetime64[ns]',
-    'unicode': np.str_,
-    'bytes': np.bytes_,
-    'string': np.str_,
-    'integer': np.int64,
-    'floating': np.float64,
-    'decimal': np.object_,
-    'empty': np.object_,
-}
+def get_pandas_logical_type_map():
+    global _pandas_logical_type_map
+
+    if not _pandas_logical_type_map:
+        _pandas_logical_type_map.update({
+            'date': 'datetime64[D]',
+            'datetime': 'datetime64[ns]',
+            'datetimetz': 'datetime64[ns]',
+            'unicode': np.str_,
+            'bytes': np.bytes_,
+            'string': np.str_,
+            'integer': np.int64,
+            'floating': np.float64,
+            'decimal': np.object_,
+            'empty': np.object_,
+        })
+    return _pandas_logical_type_map
 
 
 def _pandas_type_to_numpy_type(pandas_type):
@@ -1050,8 +1064,9 @@ def _pandas_type_to_numpy_type(pandas_type):
     dtype : np.dtype
         The dtype that corresponds to `pandas_type`.
     """
+    pandas_logical_type_map = get_pandas_logical_type_map()
     try:
-        return _pandas_logical_type_map[pandas_type]
+        return pandas_logical_type_map[pandas_type]
     except KeyError:
         if 'mixed' in pandas_type:
             # catching 'mixed', 'mixed-integer' and 'mixed-integer-float'
diff --git a/python/pyarrow/src/arrow/python/inference.cc b/python/pyarrow/src/arrow/python/inference.cc
index 10116f9afad69..1aa7915ba1e19 100644
--- a/python/pyarrow/src/arrow/python/inference.cc
+++ b/python/pyarrow/src/arrow/python/inference.cc
@@ -395,11 +395,11 @@ class TypeInferrer {
       *keep_going = make_unions_;
     } else if (arrow::py::is_scalar(obj)) {
       RETURN_NOT_OK(VisitArrowScalar(obj, keep_going));
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       RETURN_NOT_OK(VisitDType(PyArray_DescrFromScalar(obj), keep_going));
     } else if (PySet_Check(obj) || (Py_TYPE(obj) == &PyDictValues_Type)) {
       RETURN_NOT_OK(VisitSet(obj, keep_going));
-    } else if (PyArray_Check(obj)) {
+    } else if (has_numpy() && PyArray_Check(obj)) {
       RETURN_NOT_OK(VisitNdarray(obj, keep_going));
     } else if (PyDict_Check(obj)) {
       RETURN_NOT_OK(VisitDict(obj));
diff --git a/python/pyarrow/src/arrow/python/iterators.h b/python/pyarrow/src/arrow/python/iterators.h
index 7b31962dac5b8..8512276848272 100644
--- a/python/pyarrow/src/arrow/python/iterators.h
+++ b/python/pyarrow/src/arrow/python/iterators.h
@@ -22,6 +22,7 @@
 #include "arrow/array/array_primitive.h"
 
 #include "arrow/python/common.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_internal.h"
 
 namespace arrow {
@@ -44,7 +45,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
   // VisitorFunc may set to false to terminate iteration
   bool keep_going = true;
 
-  if (PyArray_Check(obj)) {
+  if (has_numpy() && PyArray_Check(obj)) {
     PyArrayObject* arr_obj = reinterpret_cast<PyArrayObject*>(obj);
     if (PyArray_NDIM(arr_obj) != 1) {
       return Status::Invalid("Only 1D arrays accepted");
@@ -64,6 +65,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
     // This code path is inefficient: callers should implement dedicated
     // logic for non-object arrays.
   }
+
   if (PySequence_Check(obj)) {
     if (PyList_Check(obj) || PyTuple_Check(obj)) {
       // Use fast item access
@@ -101,7 +103,7 @@ inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
 template <class VisitorFunc>
 inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
                                   VisitorFunc&& func) {
-  if (PyArray_Check(mo)) {
+  if (has_numpy() && PyArray_Check(mo)) {
     PyArrayObject* mask = reinterpret_cast<PyArrayObject*>(mo);
     if (PyArray_NDIM(mask) != 1) {
       return Status::Invalid("Mask must be 1D array");
diff --git a/python/pyarrow/src/arrow/python/init.cc b/python/pyarrow/src/arrow/python/numpy_init.cc
similarity index 78%
rename from python/pyarrow/src/arrow/python/init.cc
rename to python/pyarrow/src/arrow/python/numpy_init.cc
index dba293bbe2366..96e2c7b7ccb5c 100644
--- a/python/pyarrow/src/arrow/python/init.cc
+++ b/python/pyarrow/src/arrow/python/numpy_init.cc
@@ -18,7 +18,16 @@
 // Trigger the array import (inversion of NO_IMPORT_ARRAY)
 #define NUMPY_IMPORT_ARRAY
 
-#include "arrow/python/init.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
-int arrow_init_numpy() { return arrow::py::import_numpy(); }
+namespace arrow::py {
+bool numpy_imported = false;
+
+int arrow_init_numpy() {
+  numpy_imported = true;
+  return arrow::py::import_numpy();
+}
+
+bool has_numpy() { return numpy_imported; }
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/init.h b/python/pyarrow/src/arrow/python/numpy_init.h
similarity index 93%
rename from python/pyarrow/src/arrow/python/init.h
rename to python/pyarrow/src/arrow/python/numpy_init.h
index 2e6c954862bd9..36c544c1b51fd 100644
--- a/python/pyarrow/src/arrow/python/init.h
+++ b/python/pyarrow/src/arrow/python/numpy_init.h
@@ -20,7 +20,8 @@
 #include "arrow/python/platform.h"
 #include "arrow/python/visibility.h"
 
-extern "C" {
+namespace arrow::py {
 ARROW_PYTHON_EXPORT
 int arrow_init_numpy();
-}
+bool has_numpy();
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/numpy_internal.h b/python/pyarrow/src/arrow/python/numpy_internal.h
index b9b632f9f9a12..0b4d0be00e42b 100644
--- a/python/pyarrow/src/arrow/python/numpy_internal.h
+++ b/python/pyarrow/src/arrow/python/numpy_internal.h
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
 #include "arrow/status.h"
@@ -155,15 +156,27 @@ inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
 namespace internal {
 
 inline bool PyFloatScalar_Check(PyObject* obj) {
-  return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  if (has_numpy()) {
+    return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  } else {
+    return PyFloat_Check(obj);
+  }
 }
 
 inline bool PyIntScalar_Check(PyObject* obj) {
-  return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  if (has_numpy()) {
+    return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  } else {
+    return PyLong_Check(obj);
+  }
 }
 
 inline bool PyBoolScalar_Check(PyObject* obj) {
-  return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  if (has_numpy()) {
+    return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  } else {
+    return PyBool_Check(obj);
+  }
 }
 
 static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc
index 746bf410911f9..eea6bf9459d1f 100644
--- a/python/pyarrow/src/arrow/python/python_test.cc
+++ b/python/pyarrow/src/arrow/python/python_test.cc
@@ -870,7 +870,7 @@ std::vector<TestCase> GetCppTestCases() {
        TestInferAllLeadingZerosExponentialNotationPositive},
       {"test_infer_all_leading_zeros_exponential_notation_negative",
        TestInferAllLeadingZerosExponentialNotationNegative},
-      {"test_object_block_write_fails", TestObjectBlockWriteFails},
+      {"test_object_block_write_fails_pandas_convert", TestObjectBlockWriteFails},
       {"test_mixed_type_fails", TestMixedTypeFails},
       {"test_from_python_decimal_rescale_not_truncateable",
        TestFromPythonDecimalRescaleNotTruncateable},
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index ce9e15c894ce3..e7195e99072b0 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -202,7 +202,7 @@ class PyValue {
       return true;
     } else if (obj == Py_False) {
       return false;
-    } else if (PyArray_IsScalar(obj, Bool)) {
+    } else if (has_numpy() && PyArray_IsScalar(obj, Bool)) {
       return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
     } else {
       return internal::InvalidValue(obj, "tried to convert to boolean");
@@ -385,7 +385,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -464,7 +464,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -664,7 +664,7 @@ class PyPrimitiveConverter<
       ARROW_ASSIGN_OR_RAISE(
           auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
       // Numpy NaT sentinels can be checked after the conversion
-      if (PyArray_CheckAnyScalarExact(value) &&
+      if (has_numpy() && PyArray_CheckAnyScalarExact(value) &&
           PyValue::IsNaT(this->primitive_type_, converted)) {
         this->primitive_builder_->UnsafeAppendNull();
       } else {
@@ -804,8 +804,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
     if (PyValue::IsNull(this->options_, value)) {
       return this->list_builder_->AppendNull();
     }
-
-    if (PyArray_Check(value)) {
+    if (has_numpy() && PyArray_Check(value)) {
       RETURN_NOT_OK(AppendNdarray(value));
     } else if (PySequence_Check(value)) {
       RETURN_NOT_OK(AppendSequence(value));
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 6d34c71c9df40..fff47373cb991 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -495,6 +495,9 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_numpy()
         array([  2,   2,   4,   4,   5, 100])
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         if zero_copy_only:
             raise ValueError(
                 "zero_copy_only must be False for pyarrow.ChunkedArray.to_numpy"
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index 6fb4fc99d7cbc..3e0c63c18fc98 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -107,6 +107,9 @@ strides: {0.strides}""".format(self)
         array([[  2,   2,   4],
                [  4,   5, 100]], dtype=int32)
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out
 
         check_status(TensorToNdarray(self.sp_tensor, self, &out))
@@ -478,6 +481,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_coords
 
@@ -743,6 +749,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -981,6 +990,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -1216,6 +1228,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 7a222cec8a7c4..0b82696d0a73f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -25,6 +25,7 @@
 
 import pytest
 import hypothesis as h
+
 from ..conftest import groups, defaults
 
 from pyarrow import set_timezone_db_path
diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
index 6d91bad57cef4..50da6693afff1 100644
--- a/python/pyarrow/tests/interchange/test_conversion.py
+++ b/python/pyarrow/tests/interchange/test_conversion.py
@@ -16,11 +16,15 @@
 # under the License.
 
 from datetime import datetime as dt
-import numpy as np
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 import pytest
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
     _PyArrowColumn,
@@ -107,13 +111,13 @@ def test_offset_of_sliced_array():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
+    "float, np_float_str", [
         # (pa.float16(), np.float16),   #not supported by pandas
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
-def test_pandas_roundtrip(uint, int, float, np_float):
+def test_pandas_roundtrip(uint, int, float, np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
@@ -122,7 +126,7 @@ def test_pandas_roundtrip(uint, int, float, np_float):
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
         }
     )
@@ -326,13 +330,13 @@ def test_pandas_roundtrip_datetime(unit):
 
 @pytest.mark.pandas
 @pytest.mark.parametrize(
-    "np_float", [np.float32, np.float64]
+    "np_float_str", ["float32", "float64"]
 )
-def test_pandas_to_pyarrow_with_missing(np_float):
+def test_pandas_to_pyarrow_with_missing(np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
-    np_array = np.array([0, np.nan, 2], dtype=np_float)
+    np_array = np.array([0, np.nan, 2], dtype=np.dtype(np_float_str))
     datetime_array = [None, dt(2007, 7, 14), dt(2007, 7, 15)]
     df = pd.DataFrame({
         # float, ColumnNullType.USE_NAN
@@ -364,6 +368,7 @@ def test_pandas_to_pyarrow_float16_with_missing():
         pi.from_dataframe(df)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint", [pa.uint8(), pa.uint16(), pa.uint32()]
 )
@@ -371,16 +376,16 @@ def test_pandas_to_pyarrow_float16_with_missing():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("offset, length", [(0, 3), (0, 2), (1, 2), (2, 1)])
-def test_pyarrow_roundtrip(uint, int, float, np_float,
+def test_pyarrow_roundtrip(uint, int, float, np_float_str,
                            unit, tz, offset, length):
 
     from datetime import datetime as dt
@@ -391,7 +396,7 @@ def test_pyarrow_roundtrip(uint, int, float, np_float,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)),
                           type=float, from_pandas=True),
             "d": [True, False, True],
             "e": [True, False, None],
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index 826089652bca6..d060f7842c2fe 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -19,10 +19,13 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.tests.strategies as past
-import pytest
 
 
 all_types = st.deferred(
@@ -39,6 +42,7 @@
 
 # datetime is tested in test_extra.py
 # dictionary is tested in test_categorical()
+@pytest.mark.numpy
 @h.given(past.arrays(all_types, size=3))
 def test_dtypes(arr):
     table = pa.table([arr], names=["a"])
@@ -51,6 +55,7 @@ def test_dtypes(arr):
     assert df.get_column(0).offset == 0
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint, uint_bw",
     [
@@ -68,17 +73,17 @@ def test_dtypes(arr):
     ]
 )
 @pytest.mark.parametrize(
-    "float, float_bw, np_float", [
-        (pa.float16(), 16, np.float16),
-        (pa.float32(), 32, np.float32),
-        (pa.float64(), 64, np.float64)
+    "float, float_bw, np_float_str", [
+        (pa.float16(), 16, "float16"),
+        (pa.float32(), 32, "float32"),
+        (pa.float64(), 64, "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['', 'America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("use_batch", [False, True])
 def test_mixed_dtypes(uint, uint_bw, int, int_bw,
-                      float, float_bw, np_float, unit, tz,
+                      float, float_bw, np_float_str, unit, tz,
                       use_batch):
     from datetime import datetime as dt
     arr = [1, 2, 3]
@@ -87,7 +92,7 @@ def test_mixed_dtypes(uint, uint_bw, int, int_bw,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
             "e": ["a", "", "c"],
             "f": pa.array(dt_arr, type=pa.timestamp(unit, tz=tz))
@@ -200,16 +205,16 @@ def test_column_get_chunks(use_batch, size, n_chunks):
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("use_batch", [False, True])
-def test_get_columns(uint, int, float, np_float, use_batch):
+def test_get_columns(uint, int, float, np_float_str, use_batch):
     arr = [[1, 2, 3], [4, 5]]
-    arr_float = np.array([1, 2, 3, 4, 5], dtype=np_float)
+    arr_float = np.array([1, 2, 3, 4, 5], dtype=np.dtype(np_float_str))
     table = pa.table(
         {
             "a": pa.chunked_array(arr, type=uint),
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index b4a57ba0b1556..fd6ad94fbd6d3 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -17,7 +17,10 @@
 
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests import util
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 194af7415e863..6496aa99092b8 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -22,7 +22,6 @@
 from shutil import copytree
 from decimal import Decimal
 
-import numpy as np
 import pytest
 
 import pyarrow as pa
@@ -47,6 +46,10 @@
 except ImportError:
     pd = tm = None
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index e6b66b00428fb..79dd96948261c 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -17,8 +17,12 @@
 
 import decimal
 import io
+import random
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -173,6 +177,7 @@ def test_direct_read_dictionary_subfield():
     assert result[0].num_chunks == 1
 
 
+@pytest.mark.numpy
 def test_dictionary_array_automatically_read():
     # ARROW-3246
 
@@ -334,10 +339,10 @@ def test_column_of_lists(tempdir):
 def test_large_list_records():
     # This was fixed in PARQUET-1100
 
-    list_lengths = np.random.randint(0, 500, size=50)
-    list_lengths[::10] = 0
+    list_lengths = [random.randint(0, 500) for _ in range(50)]
+    list_lengths[::10] = [0, 0, 0, 0, 0]
 
-    list_values = [list(map(int, np.random.randint(0, 100, size=x)))
+    list_values = [list(map(int, [random.randint(0, 100) for _ in range(x)]))
                    if i % 8 else None
                    for i, x in enumerate(list_lengths)]
 
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 47e608a1404ff..f68f1aa9cdb46 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -20,7 +20,10 @@
 import os
 import pathlib
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import unittest.mock as mock
 
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index 08fb1098322be..b89fd97cb91e6 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -19,7 +19,10 @@
 import io
 import warnings
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index c29213ebc3d42..14ce9bbfcdd58 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -20,7 +20,10 @@
 from collections import OrderedDict
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -584,7 +587,7 @@ def test_table_large_metadata():
     my_schema = pa.schema([pa.field('f0', 'double')],
                           metadata={'large': 'x' * 10000000})
 
-    table = pa.table([np.arange(10)], schema=my_schema)
+    table = pa.table([range(10)], schema=my_schema)
     _check_roundtrip(table)
 
 
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index b5913bf5c6b6e..2ea2f46873aef 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -18,7 +18,10 @@
 import io
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index db0aa1397123d..7a1b31a4d9d77 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -21,7 +21,10 @@
 import pytest
 import hypothesis as h
 import hypothesis.strategies as st
-import hypothesis.extra.numpy as npst
+try:
+    import hypothesis.extra.numpy as npst
+except ImportError:
+    npst = None
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
@@ -35,7 +38,10 @@
         import tzdata  # noqa:F401
     except ImportError:
         zoneinfo = None
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index cd381cf427dc3..76a766984dab6 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -17,7 +17,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index c44ec3f8e1afe..4160d64829483 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -27,7 +27,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -157,6 +160,7 @@ def test_binary_total_values_length():
     assert large_arr.slice(1, 3).total_values_length == 11
 
 
+@pytest.mark.numpy
 def test_to_numpy_zero_copy():
     arr = pa.array(range(10))
 
@@ -176,6 +180,7 @@ def test_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy_zero_copy():
     elements = [[2, 2, 4], [4, 5, 100]]
 
@@ -191,6 +196,7 @@ def test_chunked_array_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_unsupported_types():
     # ARROW-2871: Some primitive types are not yet supported in to_numpy
     bool_arr = pa.array([True, False, True])
@@ -217,6 +223,7 @@ def test_to_numpy_unsupported_types():
         arr.to_numpy()
 
 
+@pytest.mark.numpy
 def test_to_numpy_writable():
     arr = pa.array(range(10))
     np_arr = arr.to_numpy()
@@ -234,6 +241,7 @@ def test_to_numpy_writable():
         arr.to_numpy(zero_copy_only=True, writable=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize('tz', [None, "UTC"])
 def test_to_numpy_datetime64(unit, tz):
@@ -243,6 +251,7 @@ def test_to_numpy_datetime64(unit, tz):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 def test_to_numpy_timedelta64(unit):
     arr = pa.array([1, 2, 3], pa.duration(unit))
@@ -251,6 +260,7 @@ def test_to_numpy_timedelta64(unit):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_dictionary():
     # ARROW-7591
     arr = pa.array(["a", "b", "a"]).dictionary_encode()
@@ -427,6 +437,11 @@ def test_array_getitem():
         with pytest.raises(IndexError):
             arr[idx]
 
+
+@pytest.mark.numpy
+def test_array_getitem_numpy_scalars():
+    arr = pa.array(range(10, 15))
+    lst = arr.to_pylist()
     # check that numpy scalars are supported
     for idx in range(-len(arr), len(arr)):
         assert arr[np.int32(idx)].as_py() == lst[idx]
@@ -469,9 +484,11 @@ def test_array_slice():
             res.validate()
             expected = arr.to_pylist()[start:stop]
             assert res.to_pylist() == expected
-            assert res.to_numpy().tolist() == expected
+            if np is not None:
+                assert res.to_numpy().tolist() == expected
 
 
+@pytest.mark.numpy
 def test_array_slice_negative_step():
     # ARROW-2714
     np_arr = np.arange(20)
@@ -542,6 +559,7 @@ def test_struct_array_slice():
                                    {'a': 5, 'b': 6.5}]
 
 
+@pytest.mark.numpy
 def test_array_factory_invalid_type():
 
     class MyObject:
@@ -552,6 +570,7 @@ class MyObject:
         pa.array(arr)
 
 
+@pytest.mark.numpy
 def test_array_ref_to_ndarray_base():
     arr = np.array([1, 2, 3])
 
@@ -576,6 +595,7 @@ def test_array_eq():
     assert (arr1 == None) is False  # noqa: E711
 
 
+@pytest.mark.numpy
 def test_array_from_buffers():
     values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
     nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
@@ -773,6 +793,7 @@ def test_dictionary_from_buffers(offset):
     assert a[offset:] == b
 
 
+@pytest.mark.numpy
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -795,6 +816,7 @@ def test_dictionary_from_numpy():
             assert d2[i].as_py() == dictionary[indices[i]]
 
 
+@pytest.mark.numpy
 def test_dictionary_to_numpy():
     expected = pa.array(
         ["foo", "bar", None, "foo"]
@@ -865,6 +887,7 @@ def test_dictionary_to_numpy():
     )
 
 
+@pytest.mark.numpy
 def test_dictionary_from_boxed_arrays():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -910,6 +933,7 @@ def test_dictionary_indices():
     arr.indices.validate(full=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
                          [(pa.ListArray, pa.list_),
                           (pa.LargeListArray, pa.large_list)])
@@ -1052,6 +1076,7 @@ def test_map_from_dict():
     assert tup_arr.equals(dict_arr)
 
 
+@pytest.mark.numpy
 def test_map_from_arrays():
     offsets_arr = np.array([0, 2, 5, 8], dtype='i4')
     offsets = pa.array(offsets_arr, type='int32')
@@ -1472,6 +1497,7 @@ def _check_cast_case(case, *, safe=True, check_array_construction=True):
         assert in_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_integers_safe():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), 'int8',
@@ -1558,6 +1584,7 @@ def test_chunked_array_data_warns():
     assert isinstance(res, pa.ChunkedArray)
 
 
+@pytest.mark.numpy
 def test_cast_integers_unsafe():
     # We let NumPy do the unsafe casting.
     # Note that NEP50 in the NumPy spec no longer allows
@@ -1578,6 +1605,7 @@ def test_cast_integers_unsafe():
         _check_cast_case(case, safe=False)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_safe():
     safe_cases = [
         (np.array([1.0, 2.0, 3.0], dtype='float32'), 'float32',
@@ -1591,6 +1619,7 @@ def test_floating_point_truncate_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_unsafe():
     unsafe_cases = [
         (np.array([1.1, 2.2, 3.3], dtype='float32'), 'float32',
@@ -1635,6 +1664,7 @@ def test_decimal_to_int_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_decimal_to_int_value_out_of_bounds():
     out_of_bounds_cases = [
         (
@@ -1735,6 +1765,7 @@ def test_decimal_to_decimal():
         result = arr.cast(pa.decimal128(5, 2))
 
 
+@pytest.mark.numpy
 def test_safe_cast_nan_to_int_raises():
     arr = pa.array([np.nan, 1.])
 
@@ -1742,6 +1773,7 @@ def test_safe_cast_nan_to_int_raises():
         arr.cast(pa.int64(), safe=True)
 
 
+@pytest.mark.numpy
 def test_cast_signed_to_unsigned():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), pa.uint8(),
@@ -1992,6 +2024,7 @@ def test_dictionary_decode():
         assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time32_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int32'),
                    type=pa.time32('s'))
@@ -2001,6 +2034,7 @@ def test_cast_time32_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.time64('us'))
@@ -2010,6 +2044,7 @@ def test_cast_time64_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_timestamp_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.timestamp('us'))
@@ -2035,6 +2070,7 @@ def test_cast_date32_to_int():
     assert result2.equals(arr)
 
 
+@pytest.mark.numpy
 def test_cast_duration_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.duration('us'))
@@ -2044,6 +2080,7 @@ def test_cast_duration_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_binary_to_utf8():
     binary_arr = pa.array([b'foo', b'bar', b'baz'], type=pa.binary())
     utf8_arr = binary_arr.cast(pa.utf8())
@@ -2064,6 +2101,7 @@ def test_cast_binary_to_utf8():
     assert casted.null_count == 1
 
 
+@pytest.mark.numpy
 def test_cast_date64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.date64())
@@ -2146,6 +2184,7 @@ def test_array_pickle_dictionary(pickle_module):
         assert array.equals(result)
 
 
+@pytest.mark.numpy
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
 @h.given(
     past.arrays(
@@ -2177,9 +2216,9 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         assert result_addresses == addresses
 
 
-@pytest.mark.parametrize(
-    'narr',
-    [
+@pytest.mark.numpy
+def test_to_numpy_roundtrip():
+    for narr in [
         np.arange(10, dtype=np.int64),
         np.arange(10, dtype=np.int32),
         np.arange(10, dtype=np.int16),
@@ -2191,23 +2230,23 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         np.arange(10, dtype=np.float64),
         np.arange(10, dtype=np.float32),
         np.arange(10, dtype=np.float16),
-    ]
-)
-def test_to_numpy_roundtrip(narr):
-    arr = pa.array(narr)
-    assert narr.dtype == arr.to_numpy().dtype
-    np.testing.assert_array_equal(narr, arr.to_numpy())
-    np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
-    np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
-    np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
+    ]:
+        arr = pa.array(narr)
+        assert narr.dtype == arr.to_numpy().dtype
+        np.testing.assert_array_equal(narr, arr.to_numpy())
+        np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
+        np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
+        np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
 
 
+@pytest.mark.numpy
 def test_array_uint64_from_py_over_range():
     arr = pa.array([2 ** 63], type=pa.uint64())
     expected = pa.array(np.array([2 ** 63], dtype='u8'))
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_conversions_no_sentinel_values():
     arr = np.array([1, 2, 3, 4], dtype='int8')
     refcount = sys.getrefcount(arr)
@@ -2249,6 +2288,7 @@ def test_time32_time64_from_integer():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_binary_string_pandas_null_sentinels():
     # ARROW-6227
     def _check_case(ty):
@@ -2259,6 +2299,7 @@ def _check_case(ty):
     _check_case('utf8')
 
 
+@pytest.mark.numpy
 def test_pandas_null_sentinels_raise_error():
     # ARROW-6227
     cases = [
@@ -2299,6 +2340,7 @@ def test_pandas_null_sentinels_index():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_roundtrip_from_numpy_datetimeD():
     arr = np.array([None, datetime.date(2017, 4, 4)], dtype='datetime64[D]')
 
@@ -2319,6 +2361,7 @@ def test_array_from_naive_datetimes():
     assert arr.type == pa.timestamp('us', tz=None)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('datetime64[s]', pa.timestamp('s')),
     ('datetime64[ms]', pa.timestamp('ms')),
@@ -2342,6 +2385,7 @@ def test_array_from_numpy_datetime(dtype, type):
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_different_numpy_datetime_units_raises():
     data = [
         None,
@@ -2356,6 +2400,7 @@ def test_array_from_different_numpy_datetime_units_raises():
         pa.array(data)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['ns', 'us', 'ms', 's'])
 def test_array_from_list_of_timestamps(unit):
     n = np.datetime64('NaT', unit)
@@ -2370,6 +2415,7 @@ def test_array_from_list_of_timestamps(unit):
     assert a1[0] == a2[0]
 
 
+@pytest.mark.numpy
 def test_array_from_timestamp_with_generic_unit():
     n = np.datetime64('NaT')
     x = np.datetime64('2017-01-01 01:01:01.111111111')
@@ -2380,6 +2426,7 @@ def test_array_from_timestamp_with_generic_unit():
         pa.array([n, x, y])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('timedelta64[s]', pa.duration('s')),
     ('timedelta64[ms]', pa.duration('ms')),
@@ -2408,6 +2455,7 @@ def test_array_from_numpy_timedelta(dtype, type):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_timedelta_incorrect_unit():
     # generic (no unit)
     td = np.timedelta64(1)
@@ -2423,6 +2471,7 @@ def test_array_from_numpy_timedelta_incorrect_unit():
             pa.array(data)
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_ascii():
     arr = np.array(['abcde', 'abc', ''], dtype='|S5')
 
@@ -2567,6 +2616,7 @@ def test_interval_array_from_dateoffset():
     assert list(actual_list[0]) == expected_from_pandas
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_unicode():
     dtypes = ['<U5', '>U5']
 
@@ -2599,12 +2649,14 @@ def test_array_from_numpy_unicode():
     assert arrow_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_string_from_non_string():
     # ARROW-5682 - when converting to string raise on non string-like dtype
     with pytest.raises(TypeError):
         pa.array(np.array([1, 2, 3]), type=pa.string())
 
 
+@pytest.mark.numpy
 def test_array_string_from_all_null():
     # ARROW-5682
     vals = np.array([None, None], dtype=object)
@@ -2619,6 +2671,7 @@ def test_array_string_from_all_null():
     assert arr.null_count == 2
 
 
+@pytest.mark.numpy
 def test_array_from_masked():
     ma = np.ma.array([1, 2, 3, 4], dtype='int64',
                      mask=[False, False, True, False])
@@ -2630,6 +2683,7 @@ def test_array_from_masked():
         pa.array(ma, mask=np.array([True, False, False, False]))
 
 
+@pytest.mark.numpy
 def test_array_from_shrunken_masked():
     ma = np.ma.array([0], dtype='int64')
     result = pa.array(ma)
@@ -2637,6 +2691,7 @@ def test_array_from_shrunken_masked():
     assert expected.equals(result)
 
 
+@pytest.mark.numpy
 def test_array_from_invalid_dim_raises():
     msg = "only handle 1-dimensional arrays"
     arr2d = np.array([[1, 2, 3], [4, 5, 6]])
@@ -2648,6 +2703,7 @@ def test_array_from_invalid_dim_raises():
         pa.array(arr0d)
 
 
+@pytest.mark.numpy
 def test_array_from_strided_bool():
     # ARROW-6325
     arr = np.ones((3, 2), dtype=bool)
@@ -2659,6 +2715,7 @@ def test_array_from_strided_bool():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_strided():
     pydata = [
         ([b"ab", b"cd", b"ef"], (pa.binary(), pa.binary(2))),
@@ -2683,6 +2740,7 @@ def test_boolean_true_count_false_count():
     assert arr.false_count == 1000
 
 
+@pytest.mark.numpy
 def test_buffers_primitive():
     a = pa.array([1, 2, None, 4], type=pa.int16())
     buffers = a.buffers()
@@ -2755,6 +2813,7 @@ def test_buffers_nested():
     assert struct.unpack('4xh', values) == (43,)
 
 
+@pytest.mark.numpy
 def test_total_buffer_size():
     a = pa.array(np.array([4, 5, 6], dtype='int64'))
     assert a.nbytes == 8 * 3
@@ -3153,6 +3212,7 @@ def test_nested_dictionary_array():
     assert dict_arr2.to_pylist() == ['a', 'b', 'a', 'b', 'a']
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_str_utf8():
     # ARROW-3890 -- in Python 3, NPY_UNICODE arrays are produced, but in Python
     # 2 they are NPY_STRING (binary), so we must do UTF-8 validation
@@ -3179,6 +3239,7 @@ def test_array_from_numpy_str_utf8():
         pa.array(vec, pa.string(), mask=np.array([False]))
 
 
+@pytest.mark.numpy
 @pytest.mark.slow
 @pytest.mark.large_memory
 def test_numpy_binary_overflow_to_chunked():
@@ -3237,6 +3298,7 @@ def test_list_child_overflow_to_chunked():
     assert len(arr.chunk(1)) == 1
 
 
+@pytest.mark.numpy
 def test_infer_type_masked():
     # ARROW-5208
     ty = pa.infer_type(['foo', 'bar', None, 2],
@@ -3252,6 +3314,7 @@ def test_infer_type_masked():
     assert pa.infer_type([], mask=[]) == pa.null()
 
 
+@pytest.mark.numpy
 def test_array_masked():
     # ARROW-5208
     arr = pa.array([4, None, 4, 3.],
@@ -3264,6 +3327,7 @@ def test_array_masked():
     assert arr.type == pa.int64()
 
 
+@pytest.mark.numpy
 def test_array_supported_masks():
     # ARROW-13883
     arr = pa.array([4, None, 4, 3.],
@@ -3322,6 +3386,7 @@ def test_array_supported_pandas_masks():
     assert arr.to_pylist() == [None, 1]
 
 
+@pytest.mark.numpy
 def test_binary_array_masked():
     # ARROW-12431
     masked_basic = pa.array([b'\x05'], type=pa.binary(1),
@@ -3354,6 +3419,7 @@ def test_binary_array_masked():
     assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_binary_array_strided():
     # Masked
     nparray = np.array([b"ab", b"cd", b"ef"])
@@ -3367,6 +3433,7 @@ def test_binary_array_strided():
     assert [b"ab", b"ef"] == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_array_invalid_mask_raises():
     # ARROW-10742
     cases = [
@@ -3400,6 +3467,7 @@ def test_array_from_large_pyints():
         pa.array([int(2 ** 63)])
 
 
+@pytest.mark.numpy
 def test_numpy_array_protocol():
     # test the __array__ method on pyarrow.Array
     arr = pa.array([1, 2, 3])
@@ -3446,6 +3514,7 @@ def test_numpy_array_protocol():
     assert result.dtype == "float64"
 
 
+@pytest.mark.numpy
 def test_array_protocol():
 
     class MyArray:
@@ -3769,6 +3838,7 @@ def test_run_end_encoded_from_buffers():
                                            1, offset, children)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_from_array_with_type():
     run_ends = [1, 3, 6]
     values = [1, 2, 3]
@@ -3808,6 +3878,7 @@ def test_run_end_encoded_from_array_with_type():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_to_numpy():
     arr = [1, 2, 2, 3, 3, 3]
     ree_array = pa.array(arr, pa.run_end_encoded(pa.int32(), pa.int64()))
@@ -4023,6 +4094,7 @@ def test_list_view_slice(list_view_type):
     assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('numpy_native_dtype', ['u2', 'i4', 'f8'])
 def test_swapped_byte_order_fails(numpy_native_dtype):
     # ARROW-39129
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index abc8a0013df37..9187a19b5fc24 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -15,10 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
 import weakref
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.lib import StringBuilder, StringViewBuilder
 
@@ -35,7 +34,7 @@ def test_string_builder_append():
     sbuilder = StringBuilder()
     sbuilder.append(b"a byte string")
     sbuilder.append("a string")
-    sbuilder.append(np.nan)
+    sbuilder.append(math.nan)
     sbuilder.append(None)
     assert len(sbuilder) == 4
     assert sbuilder.null_count == 2
@@ -50,7 +49,7 @@ def test_string_builder_append():
 
 def test_string_builder_append_values():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     assert sbuilder.null_count == 3
     arr = sbuilder.finish()
     assert arr.null_count == 3
@@ -60,7 +59,7 @@ def test_string_builder_append_values():
 
 def test_string_builder_append_after_finish():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     arr = sbuilder.finish()
     sbuilder.append("No effect")
     expected = [None, None, "text", None, "other text"]
@@ -72,7 +71,7 @@ def test_string_view_builder():
     builder.append(b"a byte string")
     builder.append("a string")
     builder.append("a longer not-inlined string")
-    builder.append(np.nan)
+    builder.append(math.nan)
     builder.append_values([None, "text"])
     assert len(builder) == 6
     assert builder.null_count == 2
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 64fe7f1deb510..d4307cd24f8fc 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -28,7 +28,10 @@
 import sys
 import textwrap
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 try:
     import pandas as pd
@@ -44,27 +47,6 @@
 except ImportError:
     pas = None
 
-all_array_types = [
-    ('bool', [True, False, False, True, True]),
-    ('uint8', np.arange(5)),
-    ('int8', np.arange(5)),
-    ('uint16', np.arange(5)),
-    ('int16', np.arange(5)),
-    ('uint32', np.arange(5)),
-    ('int32', np.arange(5)),
-    ('uint64', np.arange(5, 10)),
-    ('int64', np.arange(5, 10)),
-    ('float', np.arange(0, 0.5, 0.1)),
-    ('double', np.arange(0, 0.5, 0.1)),
-    ('string', ['a', 'b', None, 'ddd', 'ee']),
-    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
-    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
-    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
-    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
-    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
-        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
-]
-
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
     if hasattr(func, '__arrow_compute_function__')]
@@ -87,6 +69,28 @@
 ]
 
 
+all_array_types = [
+    ('bool', [True, False, False, True, True]),
+    ('uint8', range(5)),
+    ('int8', range(5)),
+    ('uint16', range(5)),
+    ('int16', range(5)),
+    ('uint32', range(5)),
+    ('int32', range(5)),
+    ('uint64', range(5, 10)),
+    ('int64', range(5, 10)),
+    ('float', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('double', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('string', ['a', 'b', None, 'ddd', 'ee']),
+    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
+    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
+    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
+    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
+    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
+        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
+]
+
+
 def test_exported_functions():
     # Check that all exported concrete functions can be called with
     # the right number of arguments.
@@ -263,6 +267,7 @@ def test_get_function_hash_aggregate():
                         pc.HashAggregateKernel, 1)
 
 
+@pytest.mark.numpy
 def test_call_function_with_memory_pool():
     arr = pa.array(["foo", "bar", "baz"])
     indices = np.array([2, 2, 1])
@@ -1172,7 +1177,7 @@ def test_take_on_chunked_array():
         ]
     ])
 
-    indices = np.array([0, 5, 1, 6, 9, 2])
+    indices = pa.array([0, 5, 1, 6, 9, 2])
     result = arr.take(indices)
     expected = pa.chunked_array([["a", "f", "b", "g", "j", "c"]])
     assert result.equals(expected)
@@ -1304,12 +1309,6 @@ def test_filter(ty, values):
     result.validate()
     assert result.equals(pa.array([values[0], values[3], None], type=ty))
 
-    # same test with different array type
-    mask = np.array([True, False, False, True, None])
-    result = arr.filter(mask, null_selection_behavior='drop')
-    result.validate()
-    assert result.equals(pa.array([values[0], values[3]], type=ty))
-
     # non-boolean dtype
     mask = pa.array([0, 1, 0, 1, 0])
     with pytest.raises(NotImplementedError):
@@ -1321,6 +1320,17 @@ def test_filter(ty, values):
         arr.filter(mask)
 
 
+@pytest.mark.numpy
+@pytest.mark.parametrize(('ty', 'values'), all_array_types)
+def test_filter_numpy_array_mask(ty, values):
+    arr = pa.array(values, type=ty)
+    # same test as test_filter with different array type
+    mask = np.array([True, False, False, True, None])
+    result = arr.filter(mask, null_selection_behavior='drop')
+    result.validate()
+    assert result.equals(pa.array([values[0], values[3]], type=ty))
+
+
 def test_filter_chunked_array():
     arr = pa.chunked_array([["a", None], ["c", "d", "e"]])
     expected_drop = pa.chunked_array([["a"], ["e"]])
@@ -1586,9 +1596,11 @@ def test_round_to_integer(ty):
     for round_mode, expected in rmode_and_expected.items():
         options = RoundOptions(round_mode=round_mode)
         result = round(values, options=options)
-        np.testing.assert_array_equal(result, pa.array(expected))
+        expected_array = pa.array(expected, type=pa.float64())
+        assert expected_array.equals(result)
 
 
+@pytest.mark.numpy
 def test_round():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     ndigits_and_expected = {
@@ -1607,6 +1619,7 @@ def test_round():
         assert pc.round(values, ndigits, "half_towards_infinity") == result
 
 
+@pytest.mark.numpy
 def test_round_to_multiple():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     multiple_and_expected = {
@@ -1670,7 +1683,7 @@ def test_is_null():
     expected = pa.chunked_array([[True, True], [True, False]])
     assert result.equals(expected)
 
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_null()
     expected = pa.array([False, False, False, True, False])
     assert result.equals(expected)
@@ -1681,7 +1694,7 @@ def test_is_null():
 
 
 def test_is_nan():
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_nan()
     expected = pa.array([False, False, False, None, True])
     assert result.equals(expected)
@@ -1986,6 +1999,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx,
 
 
 # Cannot test float32 as case generators above assume float64
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_ty', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2003,6 +2017,7 @@ def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator):
                 ctx, decimal_ty.max_precision)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float32(), pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_traits', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2908,6 +2923,7 @@ def test_min_max_element_wise():
     assert result == pa.array([1, 2, None])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_sum(start, skip_nulls):
@@ -2962,6 +2978,7 @@ def test_cumulative_sum(start, skip_nulls):
             pc.cumulative_sum([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_prod(start, skip_nulls):
@@ -3016,6 +3033,7 @@ def test_cumulative_prod(start, skip_nulls):
             pc.cumulative_prod([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_max(start, skip_nulls):
@@ -3073,6 +3091,7 @@ def test_cumulative_max(start, skip_nulls):
             pc.cumulative_max([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_min(start, skip_nulls):
@@ -3407,6 +3426,7 @@ def create_sample_expressions():
 # Tests the Arrow-specific serialization mechanism
 
 
+@pytest.mark.numpy
 def test_expression_serialization_arrow(pickle_module):
     for expr in create_sample_expressions()["all"]:
         assert isinstance(expr, pc.Expression)
@@ -3414,6 +3434,7 @@ def test_expression_serialization_arrow(pickle_module):
         assert expr.equals(restored)
 
 
+@pytest.mark.numpy
 @pytest.mark.substrait
 def test_expression_serialization_substrait():
 
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 6140163a8ee8c..c3589877e6423 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -23,8 +23,11 @@
 import re
 
 import hypothesis as h
-import numpy as np
 import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
@@ -32,17 +35,17 @@
 
 
 int_type_pairs = [
-    (np.int8, pa.int8()),
-    (np.int16, pa.int16()),
-    (np.int32, pa.int32()),
-    (np.int64, pa.int64()),
-    (np.uint8, pa.uint8()),
-    (np.uint16, pa.uint16()),
-    (np.uint32, pa.uint32()),
-    (np.uint64, pa.uint64())]
+    ("int8", pa.int8()),
+    ("int16", pa.int16()),
+    ("int32", pa.int32()),
+    ("int64", pa.int64()),
+    ("uint8", pa.uint8()),
+    ("uint16", pa.uint16()),
+    ("uint32", pa.uint32()),
+    ("uint64", pa.uint64())]
 
 
-np_int_types, pa_int_types = zip(*int_type_pairs)
+np_str_int_types, pa_int_types = zip(*int_type_pairs)
 
 
 class StrangeIterable:
@@ -174,7 +177,9 @@ def _as_set(xs):
     return set(xs)
 
 
-SEQUENCE_TYPES = [_as_list, _as_tuple, _as_numpy_array]
+SEQUENCE_TYPES = [_as_list, _as_tuple]
+if np is not None:
+    SEQUENCE_TYPES.append(_as_numpy_array)
 ITERABLE_TYPES = [_as_set, _as_dict_values] + SEQUENCE_TYPES
 COLLECTIONS_TYPES = [_as_deque] + ITERABLE_TYPES
 
@@ -217,6 +222,7 @@ def test_sequence_boolean(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_numpy_boolean(seq):
     expected = [np.bool_(True), None, np.bool_(False), None]
@@ -225,6 +231,7 @@ def test_sequence_numpy_boolean(seq):
     assert arr.to_pylist() == [True, None, False, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_mixed_numpy_python_bools(seq):
     values = np.array([True, False])
@@ -278,11 +285,14 @@ def test_list_with_non_list(seq):
 
 
 @parametrize_with_sequence_types
+@pytest.mark.parametrize(
+    "inner_seq", SEQUENCE_TYPES
+)
 @pytest.mark.parametrize("factory", [
     pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
-def test_nested_arrays(seq, factory):
-    arr = pa.array(seq([np.array([], dtype=np.int64),
-                        np.array([1, 2], dtype=np.int64), None]),
+def test_nested_arrays(seq, inner_seq, factory):
+    arr = pa.array(seq([inner_seq([]),
+                        inner_seq([1, 2]), None]),
                    type=factory(pa.int64()))
     assert len(arr) == 3
     assert arr.null_count == 1
@@ -290,6 +300,7 @@ def test_nested_arrays(seq, factory):
     assert arr.to_pylist() == [[], [1, 2], None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_nested_fixed_size_list(seq):
     # sequence of lists
@@ -334,10 +345,12 @@ def test_sequence_all_none(seq):
     assert arr.to_pylist() == [None, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [1, None, 3, None,
                 np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
     arr = pa.array(seq(expected), type=pa_type)
@@ -347,12 +360,12 @@ def test_sequence_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([np.nan]), type=pa_type, from_pandas=False)
 
@@ -364,12 +377,12 @@ def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_nested_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_nested_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([[np.nan]]), type=pa.list_(pa_type), from_pandas=False)
 
@@ -391,10 +404,12 @@ def test_sequence_integer_inferred(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None,
                 np_scalar(np.iinfo(np_scalar).min),
                 np_scalar(np.iinfo(np_scalar).max)]
@@ -405,10 +420,12 @@ def test_sequence_numpy_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None]
     expected += [np_scalar(np.iinfo(np_scalar).min),
                  np_scalar(np.iinfo(np_scalar).max)]
@@ -434,6 +451,7 @@ def test_broken_integers(seq):
         pa.array(seq(data), type=pa.int64())
 
 
+@pytest.mark.numpy
 def test_numpy_scalars_mixed_type():
     # ARROW-4324
     data = [np.int32(10), np.float32(0.5)]
@@ -448,6 +466,7 @@ def test_numpy_scalars_mixed_type():
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason="Type inference for uint64 not implemented",
                    raises=OverflowError)
 def test_uint64_max_convert():
@@ -491,7 +510,7 @@ def test_integer_from_string_error(seq, typ):
 
 def test_convert_with_mask():
     data = [1, 2, 3, 4, 5]
-    mask = np.array([False, True, False, False, True])
+    mask = [False, True, False, False, True]
 
     result = pa.array(data, mask=mask)
     expected = pa.array([1, None, 3, 4, None])
@@ -559,6 +578,7 @@ def test_double_integer_coerce_representable_range():
         pa.array(invalid_values2)
 
 
+@pytest.mark.numpy
 def test_float32_integer_coerce_representable_range():
     f32 = np.float32
     valid_values = [f32(1.5), 1 << 24, -(1 << 24)]
@@ -587,14 +607,16 @@ def test_mixed_sequence_errors():
         pa.array([1.5, 'foo'])
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar,pa_type", [
-    (np.float16, pa.float16()),
-    (np.float32, pa.float32()),
-    (np.float64, pa.float64())
+@pytest.mark.parametrize("np_str_scalar,pa_type", [
+    ("float16", pa.float16()),
+    ("float32", pa.float32()),
+    ("float64", pa.float64())
 ])
 @pytest.mark.parametrize("from_pandas", [True, False])
-def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
+def test_sequence_numpy_double(seq, np_str_scalar, pa_type, from_pandas):
+    np_scalar = getattr(np, np_str_scalar)
     data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, np.nan]
     arr = pa.array(seq(data), from_pandas=from_pandas)
     assert len(arr) == 6
@@ -616,27 +638,29 @@ def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
         assert np.isnan(arr.to_pylist()[5])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("from_pandas", [True, False])
-@pytest.mark.parametrize("inner_seq", [np.array, list])
-def test_ndarray_nested_numpy_double(from_pandas, inner_seq):
+def test_ndarray_nested_numpy_double(from_pandas):
     # ARROW-2806
-    data = np.array([
-        inner_seq([1., 2.]),
-        inner_seq([1., 2., 3.]),
-        inner_seq([np.nan]),
-        None
-    ], dtype=object)
-    arr = pa.array(data, from_pandas=from_pandas)
-    assert len(arr) == 4
-    assert arr.null_count == 1
-    assert arr.type == pa.list_(pa.float64())
-    if from_pandas:
-        assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
-    else:
-        np.testing.assert_equal(arr.to_pylist(),
-                                [[1., 2.], [1., 2., 3.], [np.nan], None])
+    for inner_seq in (np.array, list):
+        data = np.array([
+            inner_seq([1., 2.]),
+            inner_seq([1., 2., 3.]),
+            inner_seq([np.nan]),
+            None
+        ], dtype=object)
+        arr = pa.array(data, from_pandas=from_pandas)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pa.list_(pa.float64())
+        if from_pandas:
+            assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
+        else:
+            np.testing.assert_equal(arr.to_pylist(),
+                                    [[1., 2.], [1., 2., 3.], [np.nan], None])
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_in_object_array():
     # ARROW-4350
     arr = np.empty(2, dtype=object)
@@ -664,6 +688,7 @@ def test_nested_ndarray_in_object_array():
     assert result.to_pylist() == [[[1], [2]], [[1], [2]]]
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason=("Type inference for multidimensional ndarray "
                            "not yet implemented"),
                    raises=AssertionError)
@@ -682,6 +707,7 @@ def test_multidimensional_ndarray_as_nested_list():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([True, False], pa.bool_()),
     ([None, None], pa.null()),
@@ -711,6 +737,7 @@ def test_list_array_from_object_ndarray(data, value_type):
     assert arr.to_pylist() == [data]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([[1, 2], [3]], pa.list_(pa.int64())),
     ([[1, 2], [3, 4]], pa.list_(pa.int64(), 2)),
@@ -730,13 +757,14 @@ def test_array_ignore_nan_from_pandas():
     # See ARROW-4324, this reverts logic that was introduced in
     # ARROW-2240
     with pytest.raises(ValueError):
-        pa.array([np.nan, 'str'])
+        pa.array([float("nan"), 'str'])
 
-    arr = pa.array([np.nan, 'str'], from_pandas=True)
+    arr = pa.array([float("nan"), 'str'], from_pandas=True)
     expected = pa.array([None, 'str'])
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_different_dtypes():
     data = [
         np.array([1, 2, 3], dtype='int64'),
@@ -1238,6 +1266,7 @@ def test_sequence_timestamp_out_of_bounds_nanosecond():
     assert arr.to_pylist()[0] == datetime.datetime(2262, 4, 12)
 
 
+@pytest.mark.numpy
 def test_sequence_numpy_timestamp():
     data = [
         np.datetime64(datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)),
@@ -1407,14 +1436,25 @@ class CustomClass():
             pa.array([1, CustomClass()], type=ty)
 
 
-@pytest.mark.parametrize('np_scalar', [True, False])
-def test_sequence_duration(np_scalar):
+def test_sequence_duration():
     td1 = datetime.timedelta(2, 3601, 1)
     td2 = datetime.timedelta(1, 100, 1000)
-    if np_scalar:
-        data = [np.timedelta64(td1), None, np.timedelta64(td2)]
-    else:
-        data = [td1, None, td2]
+    data = [td1, None, td2]
+
+    arr = pa.array(data)
+    assert len(arr) == 3
+    assert arr.type == pa.duration('us')
+    assert arr.null_count == 1
+    assert arr[0].as_py() == td1
+    assert arr[1].as_py() is None
+    assert arr[2].as_py() == td2
+
+
+@pytest.mark.numpy
+def test_sequence_duration_np_scalar():
+    td1 = datetime.timedelta(2, 3601, 1)
+    td2 = datetime.timedelta(1, 100, 1000)
+    data = [np.timedelta64(td1), None, np.timedelta64(td2)]
 
     arr = pa.array(data)
     assert len(arr) == 3
@@ -1480,6 +1520,7 @@ def test_sequence_duration_nested_lists_with_explicit_type(factory):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_sequence_duration_nested_lists_numpy():
     td1 = datetime.timedelta(1, 1, 1000)
     td2 = datetime.timedelta(1, 100)
@@ -1769,6 +1810,7 @@ def test_struct_from_dicts_bytes_keys():
     ]
 
 
+@pytest.mark.numpy
 def test_struct_from_tuples():
     ty = pa.struct([pa.field('a', pa.int32()),
                     pa.field('b', pa.string()),
@@ -1915,6 +1957,7 @@ def test_struct_from_mixed_sequence():
         pa.array(data, type=ty)
 
 
+@pytest.mark.numpy
 def test_struct_from_dicts_inference():
     expected_type = pa.struct([pa.field('a', pa.int64()),
                                pa.field('b', pa.string()),
@@ -1992,7 +2035,7 @@ def test_structarray_from_arrays_coerce():
 
 
 def test_decimal_array_with_none_and_nan():
-    values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')]
+    values = [decimal.Decimal('1.234'), None, float("nan"), decimal.Decimal('nan')]
 
     with pytest.raises(TypeError):
         # ARROW-6227: Without from_pandas=True, NaN is considered a float
@@ -2215,6 +2258,7 @@ def test_roundtrip_nanosecond_resolution_pandas_temporal_objects():
     ]
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_array_to_pylist_roundtrip(arr):
     seq = arr.to_pylist()
@@ -2498,6 +2542,7 @@ def test_array_accepts_pyarrow_scalar(seq, data, scalar_data, value_type):
     assert expect.equals(result)
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
 def test_array_accepts_pyarrow_scalar_errors(seq):
     sequence = seq([pa.scalar(1), pa.scalar("a"), pa.scalar(3.0)])
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 83800b77f894b..7508d8f0b9816 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -18,6 +18,8 @@
 import os.path
 from os.path import join as pjoin
 
+import pytest
+
 from pyarrow._pyarrow_cpp_tests import get_cpp_tests
 
 
@@ -26,10 +28,16 @@ def inject_cpp_tests(ns):
     Inject C++ tests as Python functions into namespace `ns` (a dict).
     """
     for case in get_cpp_tests():
+
         def wrapper(case=case):
             case()
         wrapper.__name__ = wrapper.__qualname__ = case.name
         wrapper.__module__ = ns['__name__']
+        # Add numpy or pandas marks if the test requires it
+        if 'numpy' in case.name:
+            wrapper = pytest.mark.numpy(wrapper)
+        elif 'pandas' in case.name:
+            wrapper = pytest.mark.pandas(wrapper)
         ns[case.name] = wrapper
 
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 112129d9602ed..dcf96f68c4da7 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -24,6 +24,7 @@
 import io
 import itertools
 import os
+import random
 import select
 import shutil
 import signal
@@ -36,8 +37,6 @@
 
 import pytest
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.csv import (
     open_csv, read_csv, ReadOptions, ParseOptions, ConvertOptions, ISO8601,
@@ -54,18 +53,32 @@ def generate_col_names():
             yield first + second
 
 
+def split_rows(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into rows
+    for i in range(0, num_rows * num_cols, num_cols):
+        yield arr[i:i + num_cols]
+
+
+def split_columns(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into columns
+    for i in range(0, num_cols):
+        yield arr[i::num_cols]
+
+
 def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True):
-    arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows))
+    rnd = random.Random(42)
+    arr = [rnd.randint(0, 1000) for _ in range(num_cols * num_rows)]
     csv = io.StringIO()
     col_names = list(itertools.islice(generate_col_names(), num_cols))
     if write_names:
         csv.write(",".join(col_names))
         csv.write(linesep)
-    for row in arr.T:
+    for row in split_rows(arr, num_cols, num_rows):
         csv.write(",".join(map(str, row)))
         csv.write(linesep)
     csv = csv.getvalue().encode()
-    columns = [pa.array(a, type=pa.int64()) for a in arr]
+    columns = [pa.array(row, type=pa.int64())
+               for row in split_columns(arr, num_cols, num_rows)]
     expected = pa.Table.from_arrays(columns, col_names)
     return csv, expected
 
@@ -127,6 +140,25 @@ def __ne__(self, other):
                 other.result != self.result)
 
 
+def test_split_rows_and_columns_utility():
+    num_cols = 5
+    num_rows = 2
+    arr = [x for x in range(1, 11)]
+    rows = list(split_rows(arr, num_cols, num_rows))
+    assert rows == [
+        [1, 2, 3, 4, 5],
+        [6, 7, 8, 9, 10]
+    ]
+    columns = list(split_columns(arr, num_cols, num_rows))
+    assert columns == [
+        [1, 6],
+        [2, 7],
+        [3, 8],
+        [4, 9],
+        [5, 10]
+    ]
+
+
 def test_read_options(pickle_module):
     cls = ReadOptions
     opts = cls()
@@ -520,6 +552,7 @@ def test_skip_rows_after_names(self):
             assert (values[opts.skip_rows + opts.skip_rows_after_names:] ==
                     table_dict[name])
 
+    @pytest.mark.numpy
     def test_row_number_offset_in_errors(self):
         # Row numbers are only correctly counted in serial reads
         def format_msg(msg_format, row, *args):
@@ -1802,6 +1835,7 @@ def test_header_skip_rows(self):
         with pytest.raises(StopIteration):
             assert reader.read_next_batch()
 
+    @pytest.mark.numpy
     def test_skip_rows_after_names(self):
         super().test_skip_rows_after_names()
 
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index d55be651b1571..a71fa036503d7 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -26,7 +26,10 @@
 import pytest
 
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 
 cuda = pytest.importorskip("pyarrow.cuda")
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index ff1722d278d5e..876f3c7f761cf 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -17,7 +17,10 @@
 
 import pytest
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 dtypes = ['uint8', 'int16', 'float32']
 cuda = pytest.importorskip("pyarrow.cuda")
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index 0eeae5d65f7d5..937d927f831b0 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -80,6 +80,9 @@ def check_cython_example_module(mod):
         mod.cast_scalar(scal, pa.list_(pa.int64()))
 
 
+# NumPy is still a required build dependency. It is present in our
+# headers and is required to build for the cython tests.
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cython_api(tmpdir):
     """
@@ -162,6 +165,7 @@ def test_cython_api(tmpdir):
                               env=subprocess_env)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_visit_strings(tmpdir):
     with tmpdir.as_cwd():
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 3b0284bcb74a6..276cd2e78db37 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -20,6 +20,7 @@
 import os
 import pathlib
 import posixpath
+import random
 import sys
 import tempfile
 import textwrap
@@ -28,7 +29,10 @@
 from shutil import copytree
 from urllib.parse import quote
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -684,8 +688,8 @@ def test_partitioning():
 
     # test partitioning roundtrip
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     partitioning_schema = pa.schema([("part", pa.string())])
@@ -2494,7 +2498,7 @@ def _create_partitioned_dataset(basedir):
         pq.write_table(table.slice(3*i, 3), part / "test.parquet")
 
     full_table = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int32()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int32()))
 
     return full_table, path
 
@@ -2532,7 +2536,7 @@ def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_modu
 
     result = dataset.to_table()
     expected = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int8()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int8()))
     assert result.equals(expected)
 
 
@@ -3567,7 +3571,7 @@ def _create_parquet_dataset_simple(root_path):
     metadata_collector = []
 
     for i in range(4):
-        table = pa.table({'f1': [i] * 10, 'f2': np.random.randn(10)})
+        table = pa.table({'f1': [i] * 10, 'f2': [random.random() for _ in range(10)]})
         pq.write_to_dataset(
             table, str(root_path), metadata_collector=metadata_collector
         )
@@ -4255,7 +4259,7 @@ def compare_tables_ignoring_order(t1, t2):
 
 
 def _generate_random_int_array(size=4, min=1, max=10):
-    return np.random.randint(min, max, size)
+    return [random.randint(min, max) for _ in range(size)]
 
 
 def _generate_data_and_columns(num_of_columns, num_of_records):
@@ -4513,8 +4517,8 @@ def file_visitor(written_file):
 
 def test_write_table(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'single'
@@ -4560,8 +4564,8 @@ def file_visitor(written_file):
 
 def test_write_table_multiple_fragments(tempdir):
     table = pa.table([
-        pa.array(range(10)), pa.array(np.random.randn(10)),
-        pa.array(np.repeat(['a', 'b'], 5))
+        pa.array(range(10)), pa.array(random.random() for _ in range(10)),
+        pa.array(['a'] * 5 + ['b'] * 5)
     ], names=["f1", "f2", "part"])
     table = pa.concat_tables([table]*2)
 
@@ -4596,8 +4600,8 @@ def test_write_table_multiple_fragments(tempdir):
 
 def test_write_iterable(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'inmemory_iterable'
@@ -4618,8 +4622,8 @@ def test_write_iterable(tempdir):
 
 def test_write_scanner(tempdir, dataset_reader):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
     dataset = ds.dataset(table)
 
@@ -4647,7 +4651,7 @@ def test_write_table_partitioned_dict(tempdir):
     # specifying the dictionary values explicitly
     table = pa.table([
         pa.array(range(20)),
-        pa.array(np.repeat(['a', 'b'], 10)).dictionary_encode(),
+        pa.array(['a'] * 10 + ['b'] * 10).dictionary_encode(),
     ], names=['col', 'part'])
 
     partitioning = ds.partitioning(table.select(["part"]).schema)
@@ -4666,6 +4670,7 @@ def test_write_table_partitioned_dict(tempdir):
     assert result.equals(table)
 
 
+@pytest.mark.numpy
 @pytest.mark.parquet
 def test_write_dataset_parquet(tempdir):
     table = pa.table([
@@ -4712,8 +4717,8 @@ def test_write_dataset_parquet(tempdir):
 
 def test_write_dataset_csv(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "chr1"])
 
     base_dir = tempdir / 'csv_dataset'
@@ -4739,8 +4744,8 @@ def test_write_dataset_csv(tempdir):
 @pytest.mark.parquet
 def test_write_dataset_parquet_file_visitor(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     visitor_called = False
@@ -4763,7 +4768,7 @@ def test_partition_dataset_parquet_file_visitor(tempdir):
     f1_vals = [item for chunk in range(4) for item in [chunk] * 10]
     f2_vals = [item*10 for chunk in range(4) for item in [chunk] * 10]
     table = pa.table({'f1': f1_vals, 'f2': f2_vals,
-                      'part': np.repeat(['a', 'b'], 20)})
+                      'part': ['a'] * 20 + ['b'] * 20})
 
     root_path = tempdir / 'partitioned'
     partitioning = ds.partitioning(
@@ -4841,8 +4846,8 @@ def test_write_dataset_s3(s3_example_simple):
     )
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
@@ -4918,8 +4923,8 @@ def test_write_dataset_s3_put_only(s3_server):
     _configure_s3_limited_user(s3_server, _minio_put_only_policy)
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a']*10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index 0d8b4a152ab9f..eb79121b1cdbe 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -17,7 +17,7 @@
 
 import base64
 from datetime import timedelta
-import numpy as np
+import random
 import pyarrow.fs as fs
 import pyarrow as pa
 
@@ -187,7 +187,10 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
 
     row_count = 2**15 + 1
     table = pa.Table.from_arrays(
-        [pa.array(np.random.rand(row_count), type=pa.float32())], names=["foo"]
+        [pa.array(
+            [random.random() for _ in range(row_count)],
+            type=pa.float32()
+        )], names=["foo"]
     )
 
     kms_config = pe.KmsConnectionConfig()
diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py
index 7cf3f4acdbd40..a18accb1e21df 100644
--- a/python/pyarrow/tests/test_dlpack.py
+++ b/python/pyarrow/tests/test_dlpack.py
@@ -19,12 +19,20 @@
 from functools import wraps
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
 
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not numpy'
+pytestmark = pytest.mark.numpy
+
+
 def PyCapsule_IsValid(capsule, name):
     return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1
 
@@ -52,45 +60,45 @@ def wrapper(*args, **kwargs):
 
 @check_bytes_allocated
 @pytest.mark.parametrize(
-    ('value_type', 'np_type'),
+    ('value_type', 'np_type_str'),
     [
-        (pa.uint8(), np.uint8),
-        (pa.uint16(), np.uint16),
-        (pa.uint32(), np.uint32),
-        (pa.uint64(), np.uint64),
-        (pa.int8(), np.int8),
-        (pa.int16(), np.int16),
-        (pa.int32(), np.int32),
-        (pa.int64(), np.int64),
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64),
+        (pa.uint8(), "uint8"),
+        (pa.uint16(), "uint16"),
+        (pa.uint32(), "uint32"),
+        (pa.uint64(), "uint64"),
+        (pa.int8(), "int8"),
+        (pa.int16(), "int16"),
+        (pa.int32(), "int32"),
+        (pa.int64(), "int64"),
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64"),
     ]
 )
-def test_dlpack(value_type, np_type):
+def test_dlpack(value_type, np_type_str):
     if Version(np.__version__) < Version("1.24.0"):
         pytest.skip("No dlpack support in numpy versions older than 1.22.0, "
                     "strict keyword in assert_array_equal added in numpy version "
                     "1.24.0")
 
-    expected = np.array([1, 2, 3], dtype=np_type)
+    expected = np.array([1, 2, 3], dtype=np.dtype(np_type_str))
     arr = pa.array(expected, type=value_type)
     check_dlpack_export(arr, expected)
 
     arr_sliced = arr.slice(1, 1)
-    expected = np.array([2], dtype=np_type)
+    expected = np.array([2], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(0, 1)
-    expected = np.array([1], dtype=np_type)
+    expected = np.array([1], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(1)
-    expected = np.array([2, 3], dtype=np_type)
+    expected = np.array([2, 3], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_zero = pa.array([], type=value_type)
-    expected = np.array([], dtype=np_type)
+    expected = np.array([], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_zero, expected)
 
 
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index aacbd2cb6e756..b74eca75bdca9 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -23,12 +23,15 @@
 from uuid import uuid4, UUID
 import sys
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
-import pytest
-
 
 @contextlib.contextmanager
 def registered_extension_type(ext_type):
@@ -562,6 +565,7 @@ def test_ext_array_pickling(pickle_module):
         assert arr.storage.to_pylist() == [b"foo", b"bar"]
 
 
+@pytest.mark.numpy
 def test_ext_array_conversion_to_numpy():
     storage1 = pa.array([1, 2, 3], type=pa.int64())
     storage2 = pa.array([b"123", b"456", b"789"], type=pa.binary(3))
@@ -619,6 +623,7 @@ def struct_w_ext_data():
     return [sarr1, sarr2]
 
 
+@pytest.mark.numpy
 def test_struct_w_ext_array_to_numpy(struct_w_ext_data):
     # ARROW-15291
     # Check that we don't segfault when trying to build
@@ -1233,6 +1238,7 @@ def test_parquet_extension_nested_in_extension(tmpdir):
             assert table == orig_table
 
 
+@pytest.mark.numpy
 def test_to_numpy():
     period_type = PeriodType('D')
     storage = pa.array([1, 2, 3, 4], pa.int64())
@@ -1285,7 +1291,11 @@ def test_empty_take():
     (["cat", "dog", "horse"], LabelType)
 ))
 @pytest.mark.parametrize(
-    "into", ["to_numpy", pytest.param("to_pandas", marks=pytest.mark.pandas)])
+    "into", [
+        pytest.param("to_numpy", marks=pytest.mark.numpy),
+        pytest.param("to_pandas", marks=pytest.mark.pandas)
+    ]
+)
 def test_extension_array_to_numpy_pandas(data, ty, into):
     storage = pa.array(data)
     ext_arr = pa.ExtensionArray.from_storage(ty(), storage)
@@ -1301,6 +1311,7 @@ def test_extension_array_to_numpy_pandas(data, ty, into):
         assert np.array_equal(result, expected)
 
 
+@pytest.mark.numpy
 def test_array_constructor():
     ext_type = IntegerType()
     storage = pa.array([1, 2, 3], type=pa.int64())
@@ -1333,6 +1344,7 @@ def test_array_constructor_from_pandas():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cpp_extension_in_python(tmpdir):
     from .test_cython import (
@@ -1430,38 +1442,45 @@ def test_tensor_type():
     assert tensor_type.permutation is None
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_class_methods(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_class_methods(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 3])
     storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
                        pa.list_(arrow_type, 6))
     arr = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(arr.to_tensor(), expected)
     np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
 
-    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=np.dtype(np_type_str))
     result = arr[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
     values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
-    flat_arr = np.array(values[0], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array(values[0], dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
     storage = pa.array(values, pa.list_(arrow_type, 12))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
     result = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]], dtype=value_type)
+        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
     result = flat_arr.reshape(1, 2, 3, 2)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result, expected)
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
@@ -1482,25 +1501,27 @@ def test_tensor_class_methods(value_type):
     assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_array_from_numpy(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_numpy(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                   dtype=value_type, order="C")
+                   dtype=np.dtype(np_type_str), order="C")
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     assert isinstance(tensor_array_from_numpy.type, pa.FixedShapeTensorType)
     assert tensor_array_from_numpy.type.value_type == arrow_type
     assert tensor_array_from_numpy.type.shape == [2, 3]
 
     arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]],
-                   dtype=value_type, order="F")
+                   dtype=np.dtype(np_type_str), order="F")
     with pytest.raises(ValueError, match="First stride needs to be largest"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+                        dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
 
     arr = flat_arr.reshape(1, 3, 4)
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
@@ -1518,23 +1539,26 @@ def test_tensor_array_from_numpy(value_type):
     arr = flat_arr.reshape(1, 2, 3, 2)
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
-    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], dtype=value_type)
+    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                   dtype=np.dtype(np_type_str))
     expected = arr[1:]
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
-    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array(1, dtype=value_type)
+    arr = np.array(1, dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array([], dtype=value_type)
+    arr = np.array([], dtype=np.dtype(np_type_str))
 
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0)))
@@ -1546,6 +1570,7 @@ def test_tensor_array_from_numpy(value_type):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((3, 0, 2)))
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("tensor_type", (
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3]),
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3], permutation=[0, 2, 1]),
@@ -1801,6 +1826,7 @@ def test_bool8_to_bool_conversion():
     assert bool_arr.cast(pa.bool8()) == canonical_bool8_arr
 
 
+@pytest.mark.numpy
 def test_bool8_to_numpy_conversion():
     arr = pa.ExtensionArray.from_storage(
         pa.bool8(),
@@ -1841,6 +1867,7 @@ def test_bool8_to_numpy_conversion():
     assert arr_to_np_writable.ctypes.data != arr_no_nulls.buffers()[1].address
 
 
+@pytest.mark.numpy
 def test_bool8_from_numpy_conversion():
     np_arr_no_nulls = np.array([True, False, True, True], dtype=np.bool_)
     canonical_bool8_arr_no_nulls = pa.ExtensionArray.from_storage(
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 0064006489088..18c8cd5b654e6 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -23,7 +23,10 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -135,6 +138,7 @@ def f():
     pytest.raises(exc, f)
 
 
+@pytest.mark.numpy
 def test_dataset(version):
     num_values = (100, 100)
     num_files = 5
@@ -354,6 +358,7 @@ def test_buffer_bounds_error(version):
         _check_arrow_roundtrip(table)
 
 
+@pytest.mark.numpy
 def test_boolean_object_nulls(version):
     repeats = 100
     table = pa.Table.from_arrays(
@@ -540,6 +545,7 @@ def test_read_columns(version):
                             columns=['boo', 'woo'])
 
 
+@pytest.mark.numpy
 def test_overwritten_file(version):
     path = random_path()
     TEST_FILES.append(path)
@@ -675,6 +681,7 @@ def test_v2_compression_options():
         write_feather(df, buf, compression='snappy')
 
 
+@pytest.mark.numpy
 def test_v2_lz4_default_compression():
     # ARROW-8750: Make sure that the compression=None option selects lz4 if
     # it's available
@@ -807,6 +814,7 @@ def test_nested_types(compression):
     _check_arrow_roundtrip(table, compression=compression)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables, st.sampled_from(["uncompressed", "lz4", "zstd"]))
 def test_roundtrip(table, compression):
     _check_arrow_roundtrip(table, compression=compression)
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 832c6a2dbdf9f..029a2695b9fd8 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -28,7 +28,10 @@
 import traceback
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 
@@ -1588,6 +1591,7 @@ def test_flight_do_put_metadata():
                 assert idx == server_idx
 
 
+@pytest.mark.numpy
 def test_flight_do_put_limit():
     """Try a simple do_put call with a size limit."""
     large_batch = pa.RecordBatch.from_arrays([
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index ef499a3a8d76c..e2df1b1c46835 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -25,11 +25,15 @@
 import os
 import pathlib
 import pytest
+import random
 import sys
 import tempfile
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -464,6 +468,7 @@ def test_buffer_hex(val, expected_hex_buffer):
     assert buf.hex() == expected_hex_buffer
 
 
+@pytest.mark.numpy
 def test_buffer_to_numpy():
     # Make sure creating a numpy array from an arrow buffer works
     byte_array = bytearray(20)
@@ -476,6 +481,7 @@ def test_buffer_to_numpy():
     assert array.base == buf
 
 
+@pytest.mark.numpy
 def test_buffer_from_numpy():
     # C-contiguous
     arr = np.arange(12, dtype=np.int8).reshape((3, 4))
@@ -493,6 +499,7 @@ def test_buffer_from_numpy():
         buf = pa.py_buffer(arr.T[::2])
 
 
+@pytest.mark.numpy
 def test_buffer_address():
     b1 = b'some data!'
     b2 = bytearray(b1)
@@ -513,6 +520,7 @@ def test_buffer_address():
     assert buf.address == arr.ctypes.data
 
 
+@pytest.mark.numpy
 def test_buffer_equals():
     # Buffer.equals() returns true iff the buffers have the same contents
     def eq(a, b):
@@ -624,6 +632,7 @@ def test_buffer_hashing():
         hash(pa.py_buffer(b'123'))
 
 
+@pytest.mark.numpy
 def test_buffer_protocol_respects_immutability():
     # ARROW-3228; NumPy's frombuffer ctor determines whether a buffer-like
     # object is mutable by first attempting to get a mutable buffer using
@@ -635,6 +644,7 @@ def test_buffer_protocol_respects_immutability():
     assert not numpy_ref.flags.writeable
 
 
+@pytest.mark.numpy
 def test_foreign_buffer():
     obj = np.array([1, 2], dtype=np.int32)
     addr = obj.__array_interface__["data"][0]
@@ -669,6 +679,7 @@ def test_allocate_buffer_resizable():
     assert buf.size == 200
 
 
+@pytest.mark.numpy
 def test_non_cpu_buffer(pickle_module):
     cuda = pytest.importorskip("pyarrow.cuda")
     ctx = cuda.Context(0)
@@ -798,6 +809,7 @@ def test_cache_options_pickling(pickle_module):
         assert pickle_module.loads(pickle_module.dumps(option)) == option
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -838,6 +850,7 @@ def test_compress_decompress(compression):
         pa.decompress(compressed_bytes, codec=compression)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -996,6 +1009,7 @@ def make_buffer(bytes_obj):
     assert refcount_before == sys.getrefcount(val)
 
 
+@pytest.mark.numpy
 def test_nativefile_write_memoryview():
     f = pa.BufferOutputStream()
     data = b'ok'
@@ -1058,8 +1072,8 @@ def test_mock_output_stream():
 @pytest.fixture
 def sample_disk_data(request, tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1146,8 +1160,8 @@ def test_memory_map_writer(tmpdir):
     if sys.platform == "emscripten":
         pytest.xfail("Multiple memory maps to same file don't work on emscripten")
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1187,9 +1201,9 @@ def test_memory_map_writer(tmpdir):
 
 def test_memory_map_resize(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype(np.uint8)
-    data1 = arr.tobytes()[:(SIZE // 2)]
-    data2 = arr.tobytes()[(SIZE // 2):]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data1 = bytes(arr[:(SIZE // 2)])
+    data2 = bytes(arr[(SIZE // 2):])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1202,7 +1216,7 @@ def test_memory_map_resize(tmpdir):
     mmap.close()
 
     with open(path, 'rb') as f:
-        assert f.read() == arr.tobytes()
+        assert f.read() == bytes(arr[:SIZE])
 
 
 def test_memory_zero_length(tmpdir):
@@ -1241,8 +1255,8 @@ def test_memory_map_deref_remove(tmpdir):
 
 def test_os_file_writer(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1523,6 +1537,7 @@ def test_buffered_input_stream_detach_non_seekable():
         raw.seek(2)
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
@@ -1540,6 +1555,7 @@ def test_buffered_output_stream():
     assert np_buf[:10].tobytes() == b'123456789\0'
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream_detach():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 1e5242efe40f0..4be5792a92f6d 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -20,11 +20,15 @@
 import io
 import pathlib
 import pytest
+import random
 import socket
 import threading
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
@@ -59,7 +63,7 @@ def write_batches(self, num_batches=5, as_table=False):
         batches = []
         for i in range(num_batches):
             batch = pa.record_batch(
-                [np.random.randn(nrows),
+                [[random.random() for _ in range(nrows)],
                  ['foo', None, 'bar', 'bazbaz', 'qux']],
                 schema=schema)
             batches.append(batch)
@@ -422,7 +426,7 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format):
 @pytest.mark.zstd
 def test_compression_roundtrip():
     sink = io.BytesIO()
-    values = np.random.randint(0, 3, 10000)
+    values = [random.randint(0, 3) for _ in range(10000)]
     table = pa.Table.from_arrays([values], names=["values"])
 
     options = pa.ipc.IpcWriteOptions(compression='zstd')
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index a0a6174266310..3bb4440e89750 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -23,7 +23,10 @@
 import string
 import unittest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -297,6 +300,7 @@ def test_explicit_schema_with_unexpected_behaviour(self):
                            match="JSON parse error: unexpected field"):
             self.read_bytes(rows, parse_options=opts)
 
+    @pytest.mark.numpy
     def test_small_random_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=10)
         table = self.read_bytes(data)
@@ -304,6 +308,7 @@ def test_small_random_json(self):
         assert table.equals(expected)
         assert table.to_pydict() == expected.to_pydict()
 
+    @pytest.mark.numpy
     def test_load_large_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=100100)
         # set block size is 10MB
@@ -312,6 +317,7 @@ def test_load_large_json(self):
         assert table.num_rows == 100100
         assert expected.num_rows == 100100
 
+    @pytest.mark.numpy
     def test_stress_block_sizes(self):
         # Test a number of small block sizes to stress block stitching
         data_base, expected = make_random_json(num_cols=2, num_rows=100)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 208812c3ac458..178a073ed59dc 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -27,9 +27,18 @@
 
 import hypothesis as h
 import hypothesis.strategies as st
-import numpy as np
-import numpy.testing as npt
 import pytest
+try:
+    import numpy as np
+    import numpy.testing as npt
+    try:
+        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
+    except AttributeError:
+        from numpy.exceptions import (
+            VisibleDeprecationWarning as _np_VisibleDeprecationWarning
+        )
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
@@ -51,14 +60,6 @@
     pass
 
 
-try:
-    _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
-except AttributeError:
-    from numpy.exceptions import (
-        VisibleDeprecationWarning as _np_VisibleDeprecationWarning
-    )
-
-
 # Marks all of the tests in this module
 pytestmark = pytest.mark.pandas
 
@@ -1202,9 +1203,11 @@ def test_datetime64_to_date32(self):
 
     @pytest.mark.parametrize('mask', [
         None,
-        np.array([True, False, False, True, False, False]),
+        [True, False, False, True, False, False],
     ])
     def test_pandas_datetime_to_date64(self, mask):
+        if mask:
+            mask = np.array(mask)
         s = pd.to_datetime([
             '2018-05-10T00:00:00',
             '2018-05-11T00:00:00',
@@ -1608,7 +1611,8 @@ def test_array_from_pandas_date_with_mask(self):
         assert pa.Array.from_pandas(expected).equals(result)
 
     @pytest.mark.skipif(
-        Version('1.16.0') <= Version(np.__version__) < Version('1.16.1'),
+        np is not None and Version('1.16.0') <= Version(
+            np.__version__) < Version('1.16.1'),
         reason='Until numpy/numpy#12745 is resolved')
     def test_fixed_offset_timezone(self):
         df = pd.DataFrame({
@@ -2921,23 +2925,23 @@ class TestConvertMisc:
     """
 
     type_pairs = [
-        (np.int8, pa.int8()),
-        (np.int16, pa.int16()),
-        (np.int32, pa.int32()),
-        (np.int64, pa.int64()),
-        (np.uint8, pa.uint8()),
-        (np.uint16, pa.uint16()),
-        (np.uint32, pa.uint32()),
-        (np.uint64, pa.uint64()),
-        (np.float16, pa.float16()),
-        (np.float32, pa.float32()),
-        (np.float64, pa.float64()),
+        ("int8", pa.int8()),
+        ("int16", pa.int16()),
+        ("int32", pa.int32()),
+        ("int64", pa.int64()),
+        ("uint8", pa.uint8()),
+        ("uint16", pa.uint16()),
+        ("uint32", pa.uint32()),
+        ("uint64", pa.uint64()),
+        ("float16", pa.float16()),
+        ("float32", pa.float32()),
+        ("float64", pa.float64()),
         # XXX unsupported
         # (np.dtype([('a', 'i2')]), pa.struct([pa.field('a', pa.int16())])),
-        (np.object_, pa.string()),
-        (np.object_, pa.binary()),
-        (np.object_, pa.binary(10)),
-        (np.object_, pa.list_(pa.int64())),
+        ("object", pa.string()),
+        ("object", pa.binary()),
+        ("object", pa.binary(10)),
+        ("object", pa.list_(pa.int64())),
     ]
 
     def test_all_none_objects(self):
@@ -2950,8 +2954,8 @@ def test_all_none_category(self):
         _check_pandas_roundtrip(df)
 
     def test_empty_arrays(self):
-        for dtype, pa_type in self.type_pairs:
-            arr = np.array([], dtype=dtype)
+        for dtype_str, pa_type in self.type_pairs:
+            arr = np.array([], dtype=np.dtype(dtype_str))
             _check_array_roundtrip(arr, type=pa_type)
 
     def test_non_threaded_conversion(self):
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index bc50697e1be17..3f4a53c473e7e 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -20,7 +20,10 @@
 import pytest
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -40,7 +43,6 @@
     (1, pa.int64(), pa.Int64Scalar),
     (1, pa.uint64(), pa.UInt64Scalar),
     (1.0, None, pa.DoubleScalar),
-    (np.float16(1.0), pa.float16(), pa.HalfFloatScalar),
     (1.0, pa.float32(), pa.FloatScalar),
     (decimal.Decimal("1.123"), None, pa.Decimal128Scalar),
     (decimal.Decimal("1.1234567890123456789012345678901234567890"),
@@ -98,6 +100,40 @@ def test_basics(value, ty, klass, pickle_module):
     assert wr() is None
 
 
+# This test is a copy of test_basics but only for float16 (HalfFloatScalar)
+# which currently requires a numpy scalar to create it. The test collection
+# fails if numpy is used on the parametrization when not present.
+@pytest.mark.numpy
+def test_basics_np_required(pickle_module):
+    value, ty, klass = np.float16(1.0), pa.float16(), pa.HalfFloatScalar
+    s = pa.scalar(value, type=ty)
+    s.validate()
+    s.validate(full=True)
+    assert isinstance(s, klass)
+    assert s.as_py() == value
+    assert s == pa.scalar(value, type=ty)
+    assert s != value
+    assert s != "else"
+    assert hash(s) == hash(s)
+    assert s.is_valid is True
+    assert s != None  # noqa: E711
+
+    s = pa.scalar(None, type=s.type)
+    assert s.is_valid is False
+    assert s.as_py() is None
+    assert s != pa.scalar(value, type=ty)
+
+    # test pickle roundtrip
+    restored = pickle_module.loads(pickle_module.dumps(s))
+    assert s.equals(restored)
+
+    # test that scalars are weak-referenceable
+    wr = weakref.ref(s)
+    assert wr() is not None
+    del s
+    assert wr() is None
+
+
 def test_invalid_scalar():
     s = pc.cast(pa.scalar(b"\xff"), pa.string(), safe=False)
     s.validate()
@@ -202,14 +238,15 @@ def test_numerics():
     assert str(s) == "1.5"
     assert s.as_py() == 1.5
 
-    # float16
-    s = pa.scalar(np.float16(0.5), type='float16')
-    assert isinstance(s, pa.HalfFloatScalar)
-    # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
-    # on numpy1 repr(np.float16(0.5)) == "0.5"
-    assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
-    assert str(s) == "0.5"
-    assert s.as_py() == 0.5
+    if np is not None:
+        # float16
+        s = pa.scalar(np.float16(0.5), type='float16')
+        assert isinstance(s, pa.HalfFloatScalar)
+        # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
+        # on numpy1 repr(np.float16(0.5)) == "0.5"
+        assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
+        assert str(s) == "0.5"
+        assert s.as_py() == 0.5
 
 
 def test_decimal128():
@@ -434,6 +471,7 @@ def test_timestamp_fixed_offset_print():
     assert str(arr[0]) == "1970-01-01 02:00:00+02:00"
 
 
+@pytest.mark.numpy
 def test_duration():
     arr = np.array([0, 3600000000000], dtype='timedelta64[ns]')
 
@@ -559,6 +597,7 @@ def test_list(ty, klass):
         s[2]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('ty', [
     pa.list_(pa.int64()),
     pa.large_list(pa.int64()),
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 1b05c58384cf0..bdcb6c2b42d78 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -20,7 +20,10 @@
 import weakref
 
 import pytest
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
@@ -185,6 +188,7 @@ def test_time_types():
         pa.time64('s')
 
 
+@pytest.mark.numpy
 def test_from_numpy_dtype():
     cases = [
         (np.dtype('bool'), pa.bool_()),
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index aa7da0a742086..7ba9e2b3e13db 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -19,7 +19,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 try:
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index 14fc949928c33..da50bcda52f2b 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -17,6 +17,8 @@
 
 import hypothesis as h
 
+import pytest
+
 import pyarrow as pa
 import pyarrow.tests.strategies as past
 
@@ -36,11 +38,13 @@ def test_schemas(schema):
     assert isinstance(schema, pa.lib.Schema)
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_arrays(array):
     assert isinstance(array, pa.lib.Array)
 
 
+@pytest.mark.numpy
 @h.given(past.arrays(past.primitive_types, nullable=False))
 def test_array_nullability(array):
     assert array.null_count == 0
@@ -56,6 +60,7 @@ def test_record_batches(record_bath):
     assert isinstance(record_bath, pa.lib.RecordBatch)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables)
 def test_tables(table):
     assert isinstance(table, pa.lib.Table)
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index 40700e4741321..01d468cd9e9cc 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -608,6 +608,7 @@ def table_provider(names, schema):
     assert res_tb == expected
 
 
+@pytest.mark.numpy
 def test_scalar_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
@@ -756,6 +757,7 @@ def table_provider(names, _):
     assert res_tb == expected_tb
 
 
+@pytest.mark.numpy
 def test_hash_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index cd38909edf357..3b60cff2d8cf2 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -20,7 +20,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -125,6 +128,7 @@ def test_chunked_array_can_combine_chunks_with_no_chunks():
     ).combine_chunks() == pa.array([], type=pa.bool_())
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy():
     data = pa.chunked_array([
         [1, 2, 3],
@@ -173,6 +177,7 @@ def test_chunked_array_str():
 ]"""
 
 
+@pytest.mark.numpy
 def test_chunked_array_getitem():
     data = [
         pa.array([1, 2, 3]),
@@ -972,12 +977,14 @@ def check_tensors(tensor, expected_tensor, type, size):
     assert tensor.strides == expected_tensor.strides
 
 
-@pytest.mark.parametrize('typ', [
-    np.uint8, np.uint16, np.uint32, np.uint64,
-    np.int8, np.int16, np.int32, np.int64,
-    np.float32, np.float64,
+@pytest.mark.numpy
+@pytest.mark.parametrize('typ_str', [
+    "uint8", "uint16", "uint32", "uint64",
+    "int8", "int16", "int32", "int64",
+    "float32", "float64",
 ])
-def test_recordbatch_to_tensor_uniform_type(typ):
+def test_recordbatch_to_tensor_uniform_type(typ_str):
+    typ = np.dtype(typ_str)
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
     arr3 = [100, 100, 100, 100, 100, 100, 100, 100, 100]
@@ -1031,6 +1038,7 @@ def test_recordbatch_to_tensor_uniform_type(typ):
     check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_uniform_float_16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1054,6 +1062,7 @@ def test_recordbatch_to_tensor_uniform_float_16():
     check_tensors(result, expected, pa.float16(), 27)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_mixed_type():
     # uint16 + int16 = int32
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
@@ -1105,6 +1114,7 @@ def test_recordbatch_to_tensor_mixed_type():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1124,6 +1134,7 @@ def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
         batch.to_tensor()
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_nan():
     arr1 = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
@@ -1144,6 +1155,7 @@ def test_recordbatch_to_tensor_nan():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_null():
     arr1 = [1, 2, 3, 4, None, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, None, 90]
@@ -1204,6 +1216,7 @@ def test_recordbatch_to_tensor_null():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_empty():
     batch = pa.RecordBatch.from_arrays(
         [
@@ -1295,6 +1308,7 @@ def test_slice_zero_length_table():
     table.to_pandas()
 
 
+@pytest.mark.numpy
 def test_recordbatchlist_schema_equals():
     a1 = np.array([1], dtype='uint32')
     a2 = np.array([4.0, 5.0], dtype='float64')
@@ -2130,6 +2144,7 @@ def test_table_unsafe_casting(cls):
     assert casted_table.equals(expected_table)
 
 
+@pytest.mark.numpy
 def test_invalid_table_construct():
     array = np.array([0, 1], dtype=np.uint8)
     u8 = pa.uint8()
@@ -3287,6 +3302,7 @@ def test_table_sort_by(cls):
     assert sorted_tab_dict["b"] == ["foo", "car", "bar", "foobar"]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_asarray(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
@@ -3319,6 +3335,7 @@ def test_numpy_asarray(constructor):
     assert result.dtype == "int32"
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_array_protocol(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 29c6de65b1607..debb1066280c1 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -21,7 +21,10 @@
 import warnings
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index d673f956527aa..cc680939ac46a 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -30,7 +30,10 @@
     tzst = None
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
@@ -1265,14 +1268,16 @@ def test_field_modified_copies():
 
 def test_is_integer_value():
     assert pa.types.is_integer_value(1)
-    assert pa.types.is_integer_value(np.int64(1))
+    if np is not None:
+        assert pa.types.is_integer_value(np.int64(1))
     assert not pa.types.is_integer_value('1')
 
 
 def test_is_float_value():
     assert not pa.types.is_float_value(1)
     assert pa.types.is_float_value(1.)
-    assert pa.types.is_float_value(np.float64(1))
+    if np is not None:
+        assert pa.types.is_float_value(np.float64(1))
     assert not pa.types.is_float_value('1.0')
 
 
@@ -1280,8 +1285,9 @@ def test_is_boolean_value():
     assert not pa.types.is_boolean_value(1)
     assert pa.types.is_boolean_value(True)
     assert pa.types.is_boolean_value(False)
-    assert pa.types.is_boolean_value(np.bool_(True))
-    assert pa.types.is_boolean_value(np.bool_(False))
+    if np is not None:
+        assert pa.types.is_boolean_value(np.bool_(True))
+        assert pa.types.is_boolean_value(np.bool_(False))
 
 
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 22fefbbb58ba9..93004a30618a7 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -18,7 +18,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow import compute as pc
@@ -749,6 +752,7 @@ def test_udt_datasource1_exception():
         _test_datasource1_udt(datasource1_exception)
 
 
+@pytest.mark.numpy
 def test_scalar_agg_basic(unary_agg_func_fixture):
     arr = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     result = pc.call_function("mean_udf", [arr])
@@ -756,6 +760,7 @@ def test_scalar_agg_basic(unary_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_empty(unary_agg_func_fixture):
     empty = pa.array([], pa.float64())
 
@@ -775,6 +780,7 @@ def test_scalar_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         pc.call_function("y=wrong_output_type(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_scalar_agg_varargs(varargs_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([1.0, 2.0, 3.0, 4.0, 5.0], pa.float64())
@@ -786,6 +792,7 @@ def test_scalar_agg_varargs(varargs_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_exception(exception_agg_func_fixture):
     arr = pa.array([10, 20, 30, 40, 50, 60], pa.int64())
 
@@ -793,6 +800,7 @@ def test_scalar_agg_exception(exception_agg_func_fixture):
         pc.call_function("y=exception_len(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_hash_agg_basic(unary_agg_func_fixture):
     arr1 = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -811,6 +819,7 @@ def test_hash_agg_basic(unary_agg_func_fixture):
     assert result.sort_by('id') == expected.sort_by('id')
 
 
+@pytest.mark.numpy
 def test_hash_agg_empty(unary_agg_func_fixture):
     arr1 = pa.array([], pa.float64())
     arr2 = pa.array([], pa.int32())
@@ -841,6 +850,7 @@ def test_hash_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=wrong_output_type(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_exception(exception_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -850,6 +860,7 @@ def test_hash_agg_exception(exception_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=exception_len(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_random(sum_agg_func_fixture):
     """Test hash aggregate udf with randomly sampled data"""
 
diff --git a/python/pyarrow/tests/test_without_numpy.py b/python/pyarrow/tests/test_without_numpy.py
new file mode 100644
index 0000000000000..55c12602ce89a
--- /dev/null
+++ b/python/pyarrow/tests/test_without_numpy.py
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+import pyarrow as pa
+
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not nonumpy'
+pytestmark = pytest.mark.nonumpy
+
+
+def test_array_to_np():
+    arr = pa.array(range(10))
+
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        arr.to_numpy()
+
+
+def test_chunked_array_to_np():
+    data = pa.chunked_array([
+        [1, 2, 3],
+        [4, 5, 6],
+        []
+    ])
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        data.to_numpy()
+
+
+def test_tensor_to_np():
+    tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    tensor_array = pa.ExtensionArray.from_storage(tensor_type, storage)
+
+    tensor = tensor_array.to_tensor()
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        tensor.to_numpy()
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 638eee9807335..aa6dd21f800c5 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -22,7 +22,6 @@
 import contextlib
 import decimal
 import gc
-import numpy as np
 import os
 import random
 import re
@@ -110,27 +109,15 @@ def randdecimal(precision, scale):
 
 
 def random_ascii(length):
-    return bytes(np.random.randint(65, 123, size=length, dtype='i1'))
+    return bytes([random.randint(65, 122) for i in range(length)])
 
 
 def rands(nchars):
     """
     Generate one random string.
     """
-    RANDS_CHARS = np.array(
-        list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
-    return "".join(np.random.choice(RANDS_CHARS, nchars))
-
-
-def make_dataframe():
-    import pandas as pd
-
-    N = 30
-    df = pd.DataFrame(
-        {col: np.random.randn(N) for col in string.ascii_uppercase[:4]},
-        index=pd.Index([rands(10) for _ in range(N)])
-    )
-    return df
+    RANDS_CHARS = list(string.ascii_letters + string.digits)
+    return "".join(random.choice(RANDS_CHARS) for i in range(nchars))
 
 
 def memory_leak_check(f, metric='rss', threshold=1 << 17, iterations=10,
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index f83ecc3aa4326..a46caff1f21a4 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -33,42 +33,50 @@ from cython import sizeof
 
 # These are imprecise because the type (in pandas 0.x) depends on the presence
 # of nulls
-cdef dict _pandas_type_map = {
-    _Type_NA: np.object_,  # NaNs
-    _Type_BOOL: np.bool_,
-    _Type_INT8: np.int8,
-    _Type_INT16: np.int16,
-    _Type_INT32: np.int32,
-    _Type_INT64: np.int64,
-    _Type_UINT8: np.uint8,
-    _Type_UINT16: np.uint16,
-    _Type_UINT32: np.uint32,
-    _Type_UINT64: np.uint64,
-    _Type_HALF_FLOAT: np.float16,
-    _Type_FLOAT: np.float32,
-    _Type_DOUBLE: np.float64,
-    # Pandas does not support [D]ay, so default to [ms] for date32
-    _Type_DATE32: np.dtype('datetime64[ms]'),
-    _Type_DATE64: np.dtype('datetime64[ms]'),
-    _Type_TIMESTAMP: {
-        's': np.dtype('datetime64[s]'),
-        'ms': np.dtype('datetime64[ms]'),
-        'us': np.dtype('datetime64[us]'),
-        'ns': np.dtype('datetime64[ns]'),
-    },
-    _Type_DURATION: {
-        's': np.dtype('timedelta64[s]'),
-        'ms': np.dtype('timedelta64[ms]'),
-        'us': np.dtype('timedelta64[us]'),
-        'ns': np.dtype('timedelta64[ns]'),
-    },
-    _Type_BINARY: np.object_,
-    _Type_FIXED_SIZE_BINARY: np.object_,
-    _Type_STRING: np.object_,
-    _Type_LIST: np.object_,
-    _Type_MAP: np.object_,
-    _Type_DECIMAL128: np.object_,
-}
+cdef dict _pandas_type_map = {}
+
+
+def _get_pandas_type_map():
+    global _pandas_type_map
+    if not _pandas_type_map:
+        _pandas_type_map.update({
+            _Type_NA: np.object_,  # NaNs
+            _Type_BOOL: np.bool_,
+            _Type_INT8: np.int8,
+            _Type_INT16: np.int16,
+            _Type_INT32: np.int32,
+            _Type_INT64: np.int64,
+            _Type_UINT8: np.uint8,
+            _Type_UINT16: np.uint16,
+            _Type_UINT32: np.uint32,
+            _Type_UINT64: np.uint64,
+            _Type_HALF_FLOAT: np.float16,
+            _Type_FLOAT: np.float32,
+            _Type_DOUBLE: np.float64,
+            # Pandas does not support [D]ay, so default to [ms] for date32
+            _Type_DATE32: np.dtype('datetime64[ms]'),
+            _Type_DATE64: np.dtype('datetime64[ms]'),
+            _Type_TIMESTAMP: {
+                's': np.dtype('datetime64[s]'),
+                'ms': np.dtype('datetime64[ms]'),
+                'us': np.dtype('datetime64[us]'),
+                'ns': np.dtype('datetime64[ns]'),
+            },
+            _Type_DURATION: {
+                's': np.dtype('timedelta64[s]'),
+                'ms': np.dtype('timedelta64[ms]'),
+                'us': np.dtype('timedelta64[us]'),
+                'ns': np.dtype('timedelta64[ns]'),
+            },
+            _Type_BINARY: np.object_,
+            _Type_FIXED_SIZE_BINARY: np.object_,
+            _Type_STRING: np.object_,
+            _Type_LIST: np.object_,
+            _Type_MAP: np.object_,
+            _Type_DECIMAL128: np.object_,
+        })
+    return _pandas_type_map
+
 
 cdef dict _pep3118_type_map = {
     _Type_INT8: b'b',
@@ -149,14 +157,15 @@ def _is_primitive(Type type):
 
 def _get_pandas_type(arrow_type, coerce_to_ns=False):
     cdef Type type_id = arrow_type.id
-    if type_id not in _pandas_type_map:
+    cdef dict pandas_type_map = _get_pandas_type_map()
+    if type_id not in pandas_type_map:
         return None
     if coerce_to_ns:
         # ARROW-3789: Coerce date/timestamp types to datetime64[ns]
         if type_id == _Type_DURATION:
             return np.dtype('timedelta64[ns]')
         return np.dtype('datetime64[ns]')
-    pandas_type = _pandas_type_map[type_id]
+    pandas_type = pandas_type_map[type_id]
     if isinstance(pandas_type, dict):
         unit = getattr(arrow_type, 'unit', None)
         pandas_type = pandas_type.get(unit, None)

From 44d3f763c083280d2480a735a9ab45243af48232 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Mon, 2 Sep 2024 23:06:10 +0800
Subject: [PATCH 49/63] GH-43758: [C++] Compute: More comment in RowEncoder
 (#43763)

### Rationale for this change

Some comments for RowEncoder

### What changes are included in this PR?

Some comments for RowEncoder

### Are these changes tested?

Covered by existing

### Are there any user-facing changes?

no

* GitHub Issue: #43758

Lead-authored-by: mwish <maplewish117@gmail.com>
Co-authored-by: mwish <1506118561@qq.com>
Co-authored-by: mwish <anmmscs_maple@qq.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Rossi Sun <zanmato1984@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/arrow/compute/light_array_internal.h  |   6 +-
 .../arrow/compute/row/row_encoder_internal.cc |  56 +++----
 .../arrow/compute/row/row_encoder_internal.h  | 154 ++++++++++++++----
 cpp/src/arrow/compute/row/row_internal.h      |   2 +-
 4 files changed, 155 insertions(+), 63 deletions(-)

diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h
index b8e48f096baeb..5adb06e540009 100644
--- a/cpp/src/arrow/compute/light_array_internal.h
+++ b/cpp/src/arrow/compute/light_array_internal.h
@@ -65,12 +65,12 @@ struct ARROW_EXPORT KeyColumnMetadata {
   /// If this is true the column will have a validity buffer and
   /// a data buffer and the third buffer will be unused.
   bool is_fixed_length;
-  /// \brief True if this column is the null type
+  /// \brief True if this column is the null type(NA).
   bool is_null_type;
   /// \brief The number of bytes for each item
   ///
   /// Zero has a special meaning, indicating a bit vector with one bit per value if it
-  /// isn't a null type column.
+  /// isn't a null type column. Generally, this means that the column is a boolean type.
   ///
   /// For a varying-length binary column this represents the number of bytes per offset.
   uint32_t fixed_length;
@@ -405,7 +405,7 @@ class ARROW_EXPORT ExecBatchBuilder {
 
   int num_rows() const { return values_.empty() ? 0 : values_[0].num_rows(); }
 
-  static int num_rows_max() { return 1 << kLogNumRows; }
+  static constexpr int num_rows_max() { return 1 << kLogNumRows; }
 
  private:
   static constexpr int kLogNumRows = 15;
diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.cc b/cpp/src/arrow/compute/row/row_encoder_internal.cc
index 414cc6793a5a3..0965e4e8f9571 100644
--- a/cpp/src/arrow/compute/row/row_encoder_internal.cc
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.cc
@@ -145,41 +145,37 @@ void FixedWidthKeyEncoder::AddLengthNull(int32_t* length) {
 
 Status FixedWidthKeyEncoder::Encode(const ExecValue& data, int64_t batch_length,
                                     uint8_t** encoded_bytes) {
+  auto handle_next_valid_value = [&](std::string_view bytes) {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kValidByte;
+    memcpy(encoded_ptr, bytes.data(), byte_width_);
+    encoded_ptr += byte_width_;
+  };
+  auto handle_next_null_value = [&] {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kNullByte;
+    memset(encoded_ptr, 0, byte_width_);
+    encoded_ptr += byte_width_;
+  };
   if (data.is_array()) {
     ArraySpan viewed = data.array;
+    // The original type might not be FixedSizeBinaryType, but it would
+    // treat the input as binary data.
     auto view_ty = fixed_size_binary(byte_width_);
     viewed.type = view_ty.get();
-    VisitArraySpanInline<FixedSizeBinaryType>(
-        viewed,
-        [&](std::string_view bytes) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          memcpy(encoded_ptr, bytes.data(), byte_width_);
-          encoded_ptr += byte_width_;
-        },
-        [&] {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          memset(encoded_ptr, 0, byte_width_);
-          encoded_ptr += byte_width_;
-        });
+    VisitArraySpanInline<FixedSizeBinaryType>(viewed, handle_next_valid_value,
+                                              handle_next_null_value);
   } else {
     const auto& scalar = data.scalar_as<arrow::internal::PrimitiveScalarBase>();
     if (scalar.is_valid) {
-      const std::string_view data = scalar.view();
-      DCHECK_EQ(data.size(), static_cast<size_t>(byte_width_));
+      const std::string_view scalar_data = scalar.view();
+      DCHECK_EQ(scalar_data.size(), static_cast<size_t>(byte_width_));
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kValidByte;
-        memcpy(encoded_ptr, data.data(), data.size());
-        encoded_ptr += byte_width_;
+        handle_next_valid_value(scalar_data);
       }
     } else {
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kNullByte;
-        memset(encoded_ptr, 0, byte_width_);
-        encoded_ptr += byte_width_;
+        handle_next_null_value();
       }
     }
   }
@@ -267,11 +263,11 @@ void RowEncoder::Init(const std::vector<TypeHolder>& column_types, ExecContext*
 
   for (size_t i = 0; i < column_types.size(); ++i) {
     const bool is_extension = column_types[i].id() == Type::EXTENSION;
-    const TypeHolder& type = is_extension
-                                 ? arrow::internal::checked_pointer_cast<ExtensionType>(
-                                       column_types[i].GetSharedPtr())
-                                       ->storage_type()
-                                 : column_types[i];
+    const TypeHolder& type =
+        is_extension
+            ? arrow::internal::checked_cast<const ExtensionType*>(column_types[i].type)
+                  ->storage_type()
+            : column_types[i];
 
     if (is_extension) {
       extension_types_[i] = arrow::internal::checked_pointer_cast<ExtensionType>(
@@ -379,7 +375,7 @@ Result<ExecBatch> RowEncoder::Decode(int64_t num_rows, const int32_t* row_ids) {
       ARROW_ASSIGN_OR_RAISE(out.values[i], ::arrow::internal::GetArrayView(
                                                column_array_data, extension_types_[i]))
     } else {
-      out.values[i] = column_array_data;
+      out.values[i] = std::move(column_array_data);
     }
   }
 
diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.h b/cpp/src/arrow/compute/row/row_encoder_internal.h
index 60eb14af504f7..4d6cc34af2342 100644
--- a/cpp/src/arrow/compute/row/row_encoder_internal.h
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.h
@@ -38,16 +38,41 @@ struct ARROW_EXPORT KeyEncoder {
 
   virtual ~KeyEncoder() = default;
 
+  // Increment the values in the lengths array by the length of the encoded key for the
+  // corresponding value in the given column.
+  //
+  // Generally if Encoder is for a fixed-width type, the length of the encoded key
+  // would add ExtraByteForNull + byte_width.
+  // If Encoder is for a variable-width type, the length would add ExtraByteForNull +
+  // sizeof(Offset) + buffer_size.
+  // If Encoder is for null type, the length would add 0.
   virtual void AddLength(const ExecValue& value, int64_t batch_length,
                          int32_t* lengths) = 0;
 
+  // Increment the length by the length of an encoded null value.
+  // It's a special case for AddLength like `AddLength(Null-Scalar, 1, lengths)`.
   virtual void AddLengthNull(int32_t* length) = 0;
 
+  // Encode the column into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // If value is an array, the array-size should be batch_length.
+  // If value is a scalar, the value would repeat batch_length times.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual Status Encode(const ExecValue&, int64_t batch_length,
                         uint8_t** encoded_bytes) = 0;
 
+  // Encode a null value into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // It's a special case for Encode like `Encode(Null-Scalar, 1, encoded_bytes)`.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual void EncodeNull(uint8_t** encoded_bytes) = 0;
 
+  // Decode the encoded key from the encoded_bytes, which is an array of pointers to each
+  // row buffer, into an ArrayData.
+  //
+  // NB: The pointers in the encoded_bytes will be advanced as values being decoded from.
   virtual Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes,
                                                     int32_t length, MemoryPool*) = 0;
 
@@ -94,7 +119,7 @@ struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder {
                                             MemoryPool* pool) override;
 
   std::shared_ptr<DataType> type_;
-  int byte_width_;
+  const int byte_width_;
 };
 
 struct ARROW_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder {
@@ -118,6 +143,7 @@ struct ARROW_EXPORT VarLengthKeyEncoder : KeyEncoder {
   void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override {
     if (data.is_array()) {
       int64_t i = 0;
+      ARROW_DCHECK_EQ(data.array.length, batch_length);
       VisitArraySpanInline<T>(
           data.array,
           [&](std::string_view bytes) {
@@ -142,41 +168,34 @@ struct ARROW_EXPORT VarLengthKeyEncoder : KeyEncoder {
 
   Status Encode(const ExecValue& data, int64_t batch_length,
                 uint8_t** encoded_bytes) override {
+    auto handle_next_valid_value = [&encoded_bytes](std::string_view bytes) {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kValidByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
+      encoded_ptr += sizeof(Offset);
+      memcpy(encoded_ptr, bytes.data(), bytes.size());
+      encoded_ptr += bytes.size();
+    };
+    auto handle_next_null_value = [&encoded_bytes]() {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kNullByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(0));
+      encoded_ptr += sizeof(Offset);
+    };
     if (data.is_array()) {
-      VisitArraySpanInline<T>(
-          data.array,
-          [&](std::string_view bytes) {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kValidByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-            encoded_ptr += sizeof(Offset);
-            memcpy(encoded_ptr, bytes.data(), bytes.size());
-            encoded_ptr += bytes.size();
-          },
-          [&] {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kNullByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-            encoded_ptr += sizeof(Offset);
-          });
+      DCHECK_EQ(data.length(), batch_length);
+      VisitArraySpanInline<T>(data.array, handle_next_valid_value,
+                              handle_next_null_value);
     } else {
       const auto& scalar = data.scalar_as<BaseBinaryScalar>();
       if (scalar.is_valid) {
-        const auto& bytes = *scalar.value;
+        const auto bytes = std::string_view{*scalar.value};
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-          encoded_ptr += sizeof(Offset);
-          memcpy(encoded_ptr, bytes.data(), bytes.size());
-          encoded_ptr += bytes.size();
+          handle_next_valid_value(bytes);
         }
       } else {
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-          encoded_ptr += sizeof(Offset);
+          handle_next_null_value();
         }
       }
     }
@@ -250,6 +269,68 @@ struct ARROW_EXPORT NullKeyEncoder : KeyEncoder {
   }
 };
 
+/// RowEncoder encodes ExecSpan to a variable length byte sequence
+/// created by concatenating the encoded form of each column. The encoding
+/// for each column depends on its data type.
+///
+/// This is used to encode columns into row-major format, which will be
+/// beneficial for grouping and joining operations.
+///
+/// Unlike DuckDB and arrow-rs, currently this row format can not help
+/// sortings because the row-format is uncomparable.
+///
+/// # Key Column Encoding
+///
+/// The row format is composed of the the KeyColumn encodings for each,
+/// and the column is encoded as follows:
+/// 1. A null byte for each column, indicating whether the column is null.
+///    "1" for null, "0" for non-null.
+/// 2. The "fixed width" encoding for the column, it would exist whether
+///    the column is null or not.
+/// 3. The "variable payload" encoding for the column, it would exists only
+///    for non-null string/binary columns.
+///    For string/binary columns, the length of the payload is in
+///    "fixed width" part, and the binary contents are in the
+///    "variable payload" part.
+/// 4. Specially, if all columns in a row are null, the caller may decide
+///    to refer to kRowIdForNulls instead of actually encoding/decoding
+///    it using any KeyEncoder. See the comment for encoded_nulls_.
+///
+/// The endianness of the encoded bytes is platform-dependent.
+///
+/// ## Null Type
+///
+/// Null Type is a special case, it doesn't occupy any space in the
+/// encoded row.
+///
+/// ## Fixed Width Type
+///
+/// Fixed Width Type is encoded as a fixed-width byte sequence. For example:
+/// ```
+/// Int8: 5, null, 6
+/// ```
+/// Would be encoded as [0 5], [1 0], [0 6].
+///
+/// ### Dictionary Type
+///
+/// Dictionary Type is encoded as a fixed-width byte sequence using
+/// dictionary  indices, the dictionary should be identical for all
+/// rows.
+///
+/// ## Variable Width Type
+///
+/// Variable Width Type is encoded as:
+/// [null byte, variable-byte length, variable bytes]. For example:
+///
+/// String "abc" Would be encoded as:
+/// 0 ( 1 byte for not null) + 3 ( 4 bytes for length ) + "abc" (payload)
+///
+/// Null string Would be encoded as:
+/// 1 ( 1 byte for null) + 0 ( 4 bytes for length )
+///
+/// # Row Encoding
+///
+/// The row format is the concatenation of the encodings of each column.
 class ARROW_EXPORT RowEncoder {
  public:
   static constexpr int kRowIdForNulls() { return -1; }
@@ -259,6 +340,9 @@ class ARROW_EXPORT RowEncoder {
   Status EncodeAndAppend(const ExecSpan& batch);
   Result<ExecBatch> Decode(int64_t num_rows, const int32_t* row_ids);
 
+  // Returns the encoded representation of the row at index i.
+  // If i is kRowIdForNulls, it returns the pre-encoded all-nulls
+  // row.
   inline std::string encoded_row(int32_t i) const {
     if (i == kRowIdForNulls()) {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
@@ -270,14 +354,26 @@ class ARROW_EXPORT RowEncoder {
   }
 
   int32_t num_rows() const {
-    return offsets_.size() == 0 ? 0 : static_cast<int32_t>(offsets_.size() - 1);
+    return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // offsets_ vector stores the starting position (offset) of each encoded row
+  // within the bytes_ vector. This allows for quick access to individual rows.
+  //
+  // The size would be num_rows + 1 if not empty, the last element is the total
+  // length of the bytes_ vector.
   std::vector<int32_t> offsets_;
+  // The encoded bytes of all non "kRowIdForNulls" rows.
   std::vector<uint8_t> bytes_;
+  // A pre-encoded constant row with all its columns being null. Useful when
+  // the caller is certain that an entire row is null and then uses kRowIdForNulls
+  // to refer to it.
+  //
+  // EncodeAndAppend would never append this row, but encoded_row and Decode would
+  // return this row when kRowIdForNulls is passed.
   std::vector<uint8_t> encoded_nulls_;
   std::vector<std::shared_ptr<ExtensionType>> extension_types_;
 };
diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h
index 094a9c31efe0a..3ab86fd1fc6ed 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -38,7 +38,7 @@ struct ARROW_EXPORT RowTableMetadata {
   /// For a fixed-length binary row, common size of rows in bytes,
   /// rounded up to the multiple of alignment.
   ///
-  /// For a varying-length binary, size of all encoded fixed-length key columns,
+  /// For a varying-length binary row, size of all encoded fixed-length key columns,
   /// including lengths of varying-length columns, rounded up to the multiple of string
   /// alignment.
   uint32_t fixed_length;

From 9cafbb26681a1488c16edaec231ba55c21543e3a Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Mon, 2 Sep 2024 23:38:24 +0800
Subject: [PATCH 50/63] GH-43768: [C++] Fix the case when boolean_{any|all}
 meets constant input with length in Acero (#43799)

### Rationale for this change

See https://github.com/apache/arrow/issues/43768

### What changes are included in this PR?

Fix the case when boolean_{any|all} meets constant input with length in Acero

### Are these changes tested?

Yes

### Are there any user-facing changes?

no

* GitHub Issue: #43768

Lead-authored-by: mwish <maplewish117@gmail.com>
Co-authored-by: mwish <1506118561@qq.com>
Co-authored-by: Rossi Sun <zanmato1984@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/arrow/acero/aggregate_node_test.cc    | 52 +++++++++++++++++++
 .../arrow/compute/kernels/aggregate_basic.cc  | 16 +++---
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc
index d398fb24b73d5..c623271db9fb4 100644
--- a/cpp/src/arrow/acero/aggregate_node_test.cc
+++ b/cpp/src/arrow/acero/aggregate_node_test.cc
@@ -210,5 +210,57 @@ TEST(GroupByNode, NoSkipNulls) {
   AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch}, out_batches.batches);
 }
 
+TEST(ScalarAggregateNode, AnyAll) {
+  // GH-43768: boolean_any and boolean_all with constant input should work well
+  // when min_count != 0.
+  std::shared_ptr<Schema> in_schema = schema({field("not_used", int32())});
+  std::shared_ptr<Schema> out_schema = schema({field("agg_out", boolean())});
+  struct AnyAllCase {
+    std::string batches_json;
+    Expression literal;
+    std::string expected_json;
+    bool skip_nulls = false;
+    uint32_t min_count = 2;
+  };
+  std::vector<AnyAllCase> cases{
+      {"[[42], [42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42], [42]]", literal(false), "[[false]]"},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]"},
+      {"[[42]]", literal(true), "[[null]]"},
+      {"[[42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42]]", literal(true), "[[null]]", /*skip_nulls=*/false,
+       /*min_count=*/4},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]",
+       /*skip_nulls=*/true},
+  };
+  for (const AnyAllCase& any_all_case : cases) {
+    for (auto func_name : {"any", "all"}) {
+      std::vector<ExecBatch> batches{
+          ExecBatchFromJSON({int32()}, any_all_case.batches_json)};
+      std::vector<Aggregate> aggregates = {
+          Aggregate(func_name,
+                    std::make_shared<compute::ScalarAggregateOptions>(
+                        /*skip_nulls=*/any_all_case.skip_nulls,
+                        /*min_count=*/any_all_case.min_count),
+                    FieldRef("literal"))};
+
+      // And a projection to make the input including a Scalar Boolean
+      Declaration plan = Declaration::Sequence(
+          {{"exec_batch_source", ExecBatchSourceNodeOptions(in_schema, batches)},
+           {"project", ProjectNodeOptions({any_all_case.literal}, {"literal"})},
+           {"aggregate", AggregateNodeOptions(aggregates)}});
+
+      ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema out_batches,
+                           DeclarationToExecBatches(plan));
+
+      ExecBatch expected_batch =
+          ExecBatchFromJSON({boolean()}, any_all_case.expected_json);
+
+      AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch},
+                                          out_batches.batches);
+    }
+  }
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 1fbcd6a249093..c5e0e6fd6e977 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -532,13 +532,13 @@ struct BooleanAnyImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
-      this->count += scalar.is_valid;
+      this->has_nulls |= !scalar.is_valid;
+      this->any |= scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      this->count += scalar.is_valid * batch.length;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[0].data, data.offset, data.buffers[1].data, data.offset,
@@ -603,13 +603,13 @@ struct BooleanAllImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->count += scalar.is_valid;
-      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      this->has_nulls |= !scalar.is_valid;
+      this->count += scalar.is_valid * batch.length;
+      this->all &= !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[1].data, data.offset, data.buffers[0].data, data.offset,

From 7d4cf37ce656581b123b04214f64ff44826dd83a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 2 Sep 2024 18:54:51 +0200
Subject: [PATCH 51/63] GH-43883: [CI] Remove Python version guard when
 installing GCS testbench (#43884)

We can now use the GCS testbench even if we are testing a Python version that does not support it.

* GitHub Issue: #43883

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/cpp.yml                     |  2 +-
 ci/docker/conda-cpp.dockerfile                |  2 +-
 .../python-wheel-manylinux-test.dockerfile    | 14 ++++++++---
 ci/scripts/install_gcs_testbench.sh           | 25 +++++++++++--------
 dev/tasks/python-wheels/github.osx.yml        | 16 +++++++++++-
 5 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index fd23e0cf217e6..c3ca66719a5cf 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -472,7 +472,7 @@ jobs:
         shell: msys2 {0}
         env:
           PIPX_BIN_DIR: /usr/local/bin
-          PIPX_PYTHON: ${{ steps.python-install.outputs.python-path }}
+          PIPX_BASE_PYTHON: ${{ steps.python-install.outputs.python-path }}
         run: |
           ci/scripts/install_gcs_testbench.sh default
       - name: Test
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index eb035d887a158..f0084894e19dc 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -44,7 +44,7 @@ RUN mamba install -q -y \
 
 # We want to install the GCS testbench using the Conda base environment's Python,
 # because the test environment's Python may later change.
-ENV PIPX_PYTHON=/opt/conda/bin/python3
+ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
 RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 
diff --git a/ci/docker/python-wheel-manylinux-test.dockerfile b/ci/docker/python-wheel-manylinux-test.dockerfile
index 443ff9c53cbcb..09883f9780a36 100644
--- a/ci/docker/python-wheel-manylinux-test.dockerfile
+++ b/ci/docker/python-wheel-manylinux-test.dockerfile
@@ -19,13 +19,19 @@ ARG arch
 ARG python_image_tag
 FROM ${arch}/python:${python_image_tag}
 
-# RUN pip install --upgrade pip
-
 # pandas doesn't provide wheel for aarch64 yet, so cache the compiled
 # test dependencies in a docker image
 COPY python/requirements-wheel-test.txt /arrow/python/
 RUN pip install -r /arrow/python/requirements-wheel-test.txt
 
+# Install the GCS testbench with the system Python
+RUN apt-get update -y -q && \
+    apt-get install -y -q \
+        build-essential \
+        python3-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-ARG python
-RUN PYTHON_VERSION=${python} /arrow/ci/scripts/install_gcs_testbench.sh default
+ENV PIPX_PYTHON=/usr/bin/python3 PIPX_PIP_ARGS=--prefer-binary
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 78826e94d3294..48a5858a358c9 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -39,18 +39,21 @@ if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
 fi
 
-: ${PIPX_PYTHON:=$(which python3)}
+# The Python to install pipx with
+: ${PIPX_BASE_PYTHON:=$(which python3)}
+# The Python to install the GCS testbench with
+: ${PIPX_PYTHON:=${PIPX_BASE_PYTHON:-$(which python3)}}
 
 export PIP_BREAK_SYSTEM_PACKAGES=1
-${PIPX_PYTHON} -m pip install -U pipx
+${PIPX_BASE_PYTHON} -m pip install -U pipx
 
-# This script is run with PYTHON undefined in some places,
-# but those only use older pythons.
-if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
-  pipx_flags=--verbose
-  if [[ $(id -un) == "root" ]]; then
-    # Install globally as /root/.local/bin is typically not in $PATH
-    pipx_flags="${pipx_flags} --global"
-  fi
-  ${PIPX_PYTHON} -m pipx install ${pipx_flags} "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
+pipx_flags=(--verbose --python ${PIPX_PYTHON})
+if [[ $(id -un) == "root" ]]; then
+  # Install globally as /root/.local/bin is typically not in $PATH
+  pipx_flags+=(--global)
 fi
+if [[ -n "${PIPX_PIP_ARGS}" ]]; then
+  pipx_flags+=(--pip-args "'${PIPX_PIP_ARGS}'")
+fi
+${PIPX_BASE_PYTHON} -m pipx install ${pipx_flags[@]} \
+  "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index b26aeba32b79b..a65bf9b56addf 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -113,6 +113,21 @@ jobs:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
 
+      # Use a well-known Python version for the GCS testbench, and avoid
+      # putting it in PATH.
+      - name: Set up Python for GCS testbench
+        uses: actions/setup-python@v5.1.1
+        id: gcs-python-install
+        with:
+          python-version: 3.12
+          update-environment: false
+
+      - name: Install GCS testbench
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: {{ '${{ steps.gcs-python-install.outputs.python-path }}' }}
+        run: arrow/ci/scripts/install_gcs_testbench.sh default
+
       - name: Test Wheel
         env:
           PYTEST_ADDOPTS: "-k 'not test_cancellation'"
@@ -121,7 +136,6 @@ jobs:
           source test-env/bin/activate
           pip install --upgrade pip wheel
           arch -{{ arch }} pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON_VERSION={{ python_version }} arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
           arch -{{ arch }} arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}

From 698e0416f1aa4cecc024ed6eb4ab7014375f4966 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:55:13 -0700
Subject: [PATCH 52/63] MINOR: [C#] Bump Grpc.Tools from 2.65.0 to 2.66.0 in
 /csharp (#43913)

Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.65.0 to 2.66.0.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/grpc/grpc/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.65.0&new-version=2.66.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj  | 2 +-
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index 1870888184906..ec438fde843f4 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 9e1866f84160b..afe7d39194211 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -7,7 +7,7 @@
   <ItemGroup>
     <PackageReference Include="Google.Protobuf" Version="3.27.3" />
     <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
   </ItemGroup>
 

From fa2edd468c986f0deca0e0411b26a7d2058aa5d1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 09:55:48 -0700
Subject: [PATCH 53/63] MINOR: [C#] Bump Google.Protobuf from 3.27.3 to 3.28.0
 in /csharp (#43914)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[//]: # (dependabot-start)
⚠️  **Dependabot is rebasing this PR** ⚠️

Rebasing might not happen immediately, so don't worry if this takes some time.

Note: if you make any changes to this PR yourself, they will take precedence over the rebase.

---

[//]: # (dependabot-end)

Bumps [Google.Protobuf](https://github.com/protocolbuffers/protobuf) from 3.27.3 to 3.28.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/439c42c735ae1efed57ab7771986f2a3c0b99319"><code>439c42c</code></a> Updating version.json and repo version numbers to: 28.0</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/c9454f40e91bef6187e618f4856ebea240985c81"><code>c9454f4</code></a> Remove <code>--copt=&quot;-Werror&quot;</code> from <code>.bazelrc</code> (<a href="https://redirect.github.com/protocolbuffers/protobuf/issues/18005">#18005</a>)</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/f5a1b178ad52c3e64da40caceaa4ca9e51045cb4"><code>f5a1b17</code></a> Move -Werror to our test/dev bazelrc files. (<a href="https://redirect.github.com/protocolbuffers/protobuf/issues/17938">#17938</a>)</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/0c9e14a9eb880747c94dc5eef31be73db7cf2526"><code>0c9e14a</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/17917">#17917</a> from thomasvl/patch_objc_to_28</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/6a6ebe4b1c50c5ab1b32f8e55c0a98797a565ecc"><code>6a6ebe4</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/17919">#17919</a> from protocolbuffers/28.x-202408221734</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/09ba2bb826c9fafa0f0f49af9cc52d6ce1a5fcdb"><code>09ba2bb</code></a> Updating version.json and repo version numbers to: 28.0-dev</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/e340f52e461bf2726acb9fd1e0c88a88762aaf87"><code>e340f52</code></a> Updating version.json and repo version numbers to: 28.0-rc3</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/b2764205e943d9bc912c4504d95117179e9b38e1"><code>b276420</code></a> [ObjC] Issue stderr warnings for deprecated generation options.</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/13f850d92a522b330ef9a665d38bc5b6647ea8f3"><code>13f850d</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/17913">#17913</a> from protocolbuffers/cp-compat-upgrade</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/6bf01c51a0b92278958f0169d330d64a08dbb4ec"><code>6bf01c5</code></a> Binary compatibility shims for GeneratedMessageV3, SingleFieldBuilderV3, Repe...</li>
<li>Additional commits viewable in <a href="https://github.com/protocolbuffers/protobuf/compare/v3.27.3...v3.28.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Google.Protobuf&package-manager=nuget&previous-version=3.27.3&new-version=3.28.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj       | 2 +-
 .../Apache.Arrow.Flight.TestWeb.csproj                          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index afe7d39194211..bcfb813c11435 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.0" />
     <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
     <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
index 14227e2c4eb6b..5ed7cc47d6ac2 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.0" />
     <PackageReference Include="Grpc.AspNetCore" Version="2.65.0" />
   </ItemGroup>
 

From 3a6135b66d511296281c1389063bff060a8b83e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Tue, 3 Sep 2024 00:37:03 +0200
Subject: [PATCH 54/63] GH-40216: [CI][Packaging][Python] Upload pyarrow
 nightly wheels to scientific python channel on Anaconda (#43862)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

As discussed on the main issue is interesting for discoverability to have the wheels uploaded to the nightly channel.

### What changes are included in this PR?

Added macro to upload wheel to scientific python channel

### Are these changes tested?

Via archery

### Are there any user-facing changes?

No but nightly wheels will be available on scientific python channel
* GitHub Issue: #40216

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 dev/tasks/macros.jinja                     | 12 ++++++++++++
 dev/tasks/python-sdist/github.yml          |  1 +
 dev/tasks/python-wheels/github.linux.yml   |  1 +
 dev/tasks/python-wheels/github.osx.yml     |  1 +
 dev/tasks/python-wheels/github.windows.yml |  1 +
 dev/tasks/tasks.yml                        |  1 +
 6 files changed, 17 insertions(+)

diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index df55f32222e91..63cb2fc6dd101 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -183,6 +183,18 @@ env:
   {% endif %}
 {% endmacro %}
 
+{%- macro github_upload_wheel_scientific_python(pattern) -%}
+  {%- if arrow.is_default_branch() -%}
+  - name: Upload wheel to Anaconda scientific-python
+    shell: bash
+    run: |
+      python3 -m pip install git+https://github.com/Anaconda-Platform/anaconda-client.git@1.12.3
+      anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label dev {{ pattern }}
+    env:
+      CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN: {{ '${{ secrets.CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN }}' }}
+  {% endif %}
+{% endmacro %}
+
 {%- macro azure_checkout_arrow() -%}
   - script: |
       git clone --no-checkout --branch {{ arrow.branch }} {{ arrow.remote }} arrow
diff --git a/dev/tasks/python-sdist/github.yml b/dev/tasks/python-sdist/github.yml
index ef36e358aa926..ce41f437946a7 100644
--- a/dev/tasks/python-sdist/github.yml
+++ b/dev/tasks/python-sdist/github.yml
@@ -43,3 +43,4 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/dist/*.tar.gz")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/dist/*.tar.gz")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.tar.gz")|indent }}
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 97746ba3f9b8b..f9df27ba3175b 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -110,6 +110,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index a65bf9b56addf..98e06a14ff222 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -140,3 +140,4 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index a40b9c0d65103..3a943b6ae515c 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -71,6 +71,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c6d2f2175d44c..b7e0c1601e336 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -29,6 +29,7 @@ groups:
 
   wheel:
     - wheel-*
+    - python-sdist
 
   linux:
     - almalinux-*

From 00d357674002b4e2e08b9d76b5d52530e723c4eb Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 3 Sep 2024 10:08:33 +0900
Subject: [PATCH 55/63] GH-43746: [C++] Add support for Boost 1.86 (#43766)

### Rationale for this change

`boost/process/*.hpp` are deprecated since Boost 1.86. And it seems that it also adds backward incompatible change. We need to use `boost/process/v2/*.hpp` instead.

### What changes are included in this PR?

This introduces `arrow::util::Process` for testing. It wraps boost/process/ API. So we don't need to use boost/process/ API directly in our tests.

We still use the v1 API on Windows because the v2 API doesn't process group and we don't have a workaround for it on Windows. If GCS's testbench doesn't use multiple processes, we can use the v2 API on Windows because we don't need to use process group in our use case.

See also:
* The v2 API and process group: https://github.com/boostorg/process/issues/259
* GCS's testbench and multiple processes: https://github.com/googleapis/storage-testbench/issues/669

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #43746

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml                   |   4 +-
 .github/workflows/ruby.yml                  |   5 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake |  48 +++-
 cpp/src/arrow/CMakeLists.txt                |  13 +-
 cpp/src/arrow/filesystem/CMakeLists.txt     |  18 +-
 cpp/src/arrow/filesystem/azurefs_test.cc    |  52 +---
 cpp/src/arrow/filesystem/gcsfs_test.cc      | 106 +++----
 cpp/src/arrow/filesystem/s3_test_util.cc    |  69 +----
 cpp/src/arrow/flight/CMakeLists.txt         |   5 -
 cpp/src/arrow/flight/flight_benchmark.cc    |   2 +-
 cpp/src/arrow/flight/flight_test.cc         |   4 +-
 cpp/src/arrow/flight/test_util.cc           | 118 ++------
 cpp/src/arrow/flight/test_util.h            |  17 +-
 cpp/src/arrow/testing/process.cc            | 298 ++++++++++++++++++++
 cpp/src/arrow/testing/process.h             |  46 +++
 cpp/src/gandiva/precompiled/CMakeLists.txt  |  26 +-
 cpp/vcpkg.json                              |   2 +-
 17 files changed, 500 insertions(+), 333 deletions(-)
 create mode 100644 cpp/src/arrow/testing/process.cc
 create mode 100644 cpp/src/arrow/testing/process.h

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index c3ca66719a5cf..d51438c5f193a 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -409,12 +409,10 @@ jobs:
       ARROW_WITH_SNAPPY: ON
       ARROW_WITH_ZLIB: ON
       ARROW_WITH_ZSTD: ON
-      # Don't use preinstalled Boost by empty BOOST_ROOT and
-      # -DBoost_NO_BOOST_CMAKE=ON
+      # Don't use preinstalled Boost by empty BOOST_ROOT
       BOOST_ROOT: ""
       ARROW_CMAKE_ARGS: >-
         -DARROW_PACKAGE_PREFIX=/${{ matrix.msystem_lower}}
-        -DBoost_NO_BOOST_CMAKE=ON
         -DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
       # We can't use unity build because we don't have enough memory on
       # GitHub Actions.
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index e4d650e74a8ad..4b74b8d7fc84d 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -406,7 +406,10 @@ jobs:
             -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
       - name: Build C++ vcpkg dependencies
         run: |
-          vcpkg\vcpkg.exe install --triplet $env:VCPKG_TRIPLET --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed
+          vcpkg\vcpkg.exe install `
+            --triplet $env:VCPKG_TRIPLET `
+            --x-manifest-root cpp `
+            --x-install-root build\cpp\vcpkg_installed
       - name: Build C++
         shell: cmd
         run: |
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 63e2c036c9a6f..b31037a973279 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -259,7 +259,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
       IS_RUNTIME_DEPENDENCY
       REQUIRED_VERSION
       USE_CONFIG)
-  set(multi_value_args COMPONENTS PC_PACKAGE_NAMES)
+  set(multi_value_args COMPONENTS OPTIONAL_COMPONENTS PC_PACKAGE_NAMES)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -287,6 +287,9 @@ macro(resolve_dependency DEPENDENCY_NAME)
   if(ARG_COMPONENTS)
     list(APPEND FIND_PACKAGE_ARGUMENTS COMPONENTS ${ARG_COMPONENTS})
   endif()
+  if(ARG_OPTIONAL_COMPONENTS)
+    list(APPEND FIND_PACKAGE_ARGUMENTS OPTIONAL_COMPONENTS ${ARG_OPTIONAL_COMPONENTS})
+  endif()
   if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO")
     find_package(${FIND_PACKAGE_ARGUMENTS})
     set(COMPATIBLE ${${PACKAGE_NAME}_FOUND})
@@ -1289,15 +1292,19 @@ if(ARROW_USE_BOOST)
     set(Boost_USE_STATIC_LIBS ON)
   endif()
   if(ARROW_BOOST_REQUIRE_LIBRARY)
-    set(ARROW_BOOST_COMPONENTS system filesystem)
+    set(ARROW_BOOST_COMPONENTS filesystem system)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS process)
   else()
     set(ARROW_BOOST_COMPONENTS)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS)
   endif()
   resolve_dependency(Boost
                      REQUIRED_VERSION
                      ${ARROW_BOOST_REQUIRED_VERSION}
                      COMPONENTS
                      ${ARROW_BOOST_COMPONENTS}
+                     OPTIONAL_COMPONENTS
+                     ${ARROW_BOOST_OPTIONAL_COMPONENTS}
                      IS_RUNTIME_DEPENDENCY
                      # libarrow.so doesn't depend on libboost*.
                      FALSE)
@@ -1316,14 +1323,35 @@ if(ARROW_USE_BOOST)
     endif()
   endforeach()
 
-  if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    # boost/process/detail/windows/handle_workaround.hpp doesn't work
-    # without BOOST_USE_WINDOWS_H with MinGW because MinGW doesn't
-    # provide __kernel_entry without winternl.h.
-    #
-    # See also:
-    # https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
-    target_compile_definitions(Boost::headers INTERFACE "BOOST_USE_WINDOWS_H=1")
+  if(TARGET Boost::process)
+    # Boost >= 1.86
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V1")
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+  else()
+    # Boost < 1.86
+    add_library(Boost::process INTERFACE IMPORTED)
+    if(TARGET Boost::filesystem)
+      target_link_libraries(Boost::process INTERFACE Boost::filesystem)
+    endif()
+    if(TARGET Boost::system)
+      target_link_libraries(Boost::process INTERFACE Boost::system)
+    endif()
+    if(TARGET Boost::headers)
+      target_link_libraries(Boost::process INTERFACE Boost::headers)
+    endif()
+    if(Boost_VERSION VERSION_GREATER_EQUAL 1.80)
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+      # Boost < 1.86 has a bug that
+      # boost::process::v2::process_environment::on_setup() isn't
+      # defined. We need to build Boost Process source to define it.
+      #
+      # See also:
+      # https://github.com/boostorg/process/issues/312
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_NEED_SOURCE")
+      if(WIN32)
+        target_link_libraries(Boost::process INTERFACE bcrypt ntdll)
+      endif()
+    endif()
   endif()
 
   message(STATUS "Boost include dir: ${Boost_INCLUDE_DIRS}")
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 65343df1291ba..01ac813f4713b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -644,9 +644,13 @@ else()
 endif()
 
 set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
-set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
-set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
-                                   ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process)
+set(ARROW_TESTING_STATIC_LINK_LIBS
+    arrow::flatbuffers
+    RapidJSON
+    Boost::process
+    arrow_static
+    ${ARROW_GTEST_GTEST})
 set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
 set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
 # that depend on gtest
@@ -667,9 +671,10 @@ set(ARROW_TESTING_SRCS
     io/test_common.cc
     ipc/test_common.cc
     testing/fixed_width_test_util.cc
+    testing/generator.cc
     testing/gtest_util.cc
+    testing/process.cc
     testing/random.cc
-    testing/generator.cc
     testing/util.cc)
 
 #
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index dec4bb6e3d465..7afdf566f2fb5 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -47,9 +47,7 @@ if(ARROW_GCS)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 google-cloud-cpp::storage
-                 Boost::filesystem
-                 Boost::system)
+                 google-cloud-cpp::storage)
 endif()
 
 if(ARROW_AZURE)
@@ -57,9 +55,7 @@ if(ARROW_AZURE)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 ${AZURE_SDK_LINK_LIBRARIES}
-                 Boost::filesystem
-                 Boost::system)
+                 ${AZURE_SDK_LINK_LIBRARIES})
 endif()
 
 if(ARROW_S3)
@@ -75,11 +71,7 @@ if(ARROW_S3)
   else()
     list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static)
   endif()
-  list(APPEND
-       ARROW_S3_TEST_EXTRA_LINK_LIBS
-       ${AWSSDK_LINK_LIBRARIES}
-       Boost::filesystem
-       Boost::system)
+  list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
   add_arrow_test(s3fs_test
                  SOURCES
                  s3fs_test.cc
@@ -122,9 +114,7 @@ if(ARROW_S3)
                         s3_test_util.cc
                         STATIC_LINK_LIBS
                         ${AWSSDK_LINK_LIBRARIES}
-                        ${ARROW_BENCHMARK_LINK_LIBS}
-                        Boost::filesystem
-                        Boost::system)
+                        ${ARROW_BENCHMARK_LINK_LIBS})
     if(ARROW_TEST_LINKAGE STREQUAL "static")
       target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
     else()
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 9d437d1f83aac..a8dc923476752 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -15,24 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
@@ -53,6 +35,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
@@ -67,7 +50,6 @@ namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
 using internal::ConcatAbstractPath;
-namespace bp = boost::process;
 
 using ::testing::IsEmpty;
 using ::testing::Not;
@@ -174,42 +156,32 @@ class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
  private:
   std::unique_ptr<TemporaryDir> temp_dir_;
   arrow::internal::PlatformFilename debug_log_path_;
-  bp::child server_process_;
+  std::unique_ptr<util::Process> server_process_;
 
   using AzureEnvImpl::AzureEnvImpl;
 
  public:
   static const AzureBackend kBackend = AzureBackend::kAzurite;
 
-  ~AzuriteEnv() override {
-    server_process_.terminate();
-    server_process_.wait();
-  }
+  ~AzuriteEnv() = default;
 
   static Result<std::unique_ptr<AzureEnvImpl>> Make() {
     auto self = std::unique_ptr<AzuriteEnv>(
         new AzuriteEnv("devstoreaccount1",
                        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
                        "K1SZFPTOtr/KBHBeksoGMGw=="));
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      return Status::Invalid("Could not find Azurite emulator.");
-    }
+    self->server_process_ = std::make_unique<util::Process>();
+    ARROW_RETURN_NOT_OK(self->server_process_->SetExecutable("azurite"));
     ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-"));
     ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
                           self->temp_dir_->path().Join("debug.log"));
-    auto server_process = bp::child(
-        boost::this_process::environment(), exe_path, "--silent", "--location",
-        self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString(),
-        // For old Azurite. We can't install the latest Azurite with
-        // old Node.js on old Ubuntu.
-        "--skipApiVersionCheck");
-    if (!server_process.valid() || !server_process.running()) {
-      server_process.terminate();
-      server_process.wait();
-      return Status::Invalid("Could not start Azurite emulator.");
-    }
-    self->server_process_ = std::move(server_process);
+    self->server_process_->SetArgs({"--silent", "--location",
+                                    self->temp_dir_->path().ToString(), "--debug",
+                                    self->debug_log_path_.ToString(),
+                                    // For old Azurite. We can't install the latest
+                                    // Azurite with old Node.js on old Ubuntu.
+                                    "--skipApiVersionCheck"});
+    ARROW_RETURN_NOT_OK(self->server_process_->Execute());
     return self;
   }
 
diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc
index 2098cf4d7f319..d4d5edf4b8993 100644
--- a/cpp/src/arrow/filesystem/gcsfs_test.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_test.cc
@@ -15,26 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-#include <boost/thread.hpp>
-
 #include "arrow/filesystem/gcsfs.h"
 
 #include <absl/time/time.h>
@@ -45,16 +25,15 @@
 #include <google/cloud/storage/options.h>
 #include <gtest/gtest.h>
 
-#include <array>
 #include <random>
 #include <string>
-#include <thread>
 
 #include "arrow/filesystem/gcsfs_internal.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/key_value_metadata.h"
@@ -64,7 +43,6 @@ namespace fs {
 
 namespace {
 
-namespace bp = boost::process;
 namespace gc = google::cloud;
 namespace gcs = google::cloud::storage;
 
@@ -89,70 +67,62 @@ class GcsTestbench : public ::testing::Environment {
  public:
   GcsTestbench() {
     port_ = std::to_string(GetListenPort());
-    std::vector<std::string> names{"python3", "python"};
-    // If the build script or application developer provides a value in the PYTHON
-    // environment variable, then just use that.
-    if (const auto* env = std::getenv("PYTHON")) {
-      names = {env};
-    }
     auto error = std::string("Could not start GCS emulator 'storage-testbench'");
+    auto server_process = std::make_unique<util::Process>();
+    auto status = server_process->SetExecutable("storage-testbench");
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
+    }
 
-    auto testbench_is_running = [](bp::child& process, bp::ipstream& output) {
-      // Wait for message: "* Restarting with"
-      std::string line;
+    server_process->SetArgs({"--port", port_});
+    server_process->IgnoreStderr();
+    status = server_process->Execute();
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
+    }
+
+    auto testbench_is_running = [&server_process, this]() {
+      auto ready_timeout = std::chrono::seconds(10);
       std::chrono::time_point<std::chrono::steady_clock> end =
-          std::chrono::steady_clock::now() + std::chrono::seconds(10);
-      while (process.valid() && process.running() &&
-             std::chrono::steady_clock::now() < end) {
-        if (output.peek() && std::getline(output, line)) {
-          std::cerr << line << std::endl;
-          if (line.find("* Restarting with") != std::string::npos) return true;
-        } else {
-          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+          std::chrono::steady_clock::now() + ready_timeout;
+      while (server_process->IsRunning() && std::chrono::steady_clock::now() < end) {
+        auto client = gcs::Client(
+            google::cloud::Options{}
+                .set<gcs::RestEndpointOption>("http://127.0.0.1:" + port_)
+                .set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials())
+                .set<gcs::RetryPolicyOption>(
+                    gcs::LimitedTimeRetryPolicy(ready_timeout).clone()));
+        auto metadata = client.GetBucketMetadata("nonexistent");
+        if (metadata.status().code() == google::cloud::StatusCode::kNotFound) {
+          return true;
         }
       }
       return false;
     };
 
-    auto exe_path = bp::search_path("storage-testbench");
-    if (!exe_path.empty()) {
-      bp::ipstream output;
-      server_process_ =
-          bp::child(exe_path, "--port", port_, group_, bp::std_err > output);
-      if (!testbench_is_running(server_process_, output)) {
-        error += " (failed to start)";
-        server_process_.terminate();
-        server_process_.wait();
-      }
-    } else {
-      error += " (exe not found)";
-    }
-    if (!server_process_.valid()) {
+    if (!testbench_is_running()) {
+      error += " (failed to listen)";
       error_ = std::move(error);
+      return;
     }
+
+    server_process_ = std::move(server_process);
   }
 
-  bool running() { return server_process_.running(); }
+  bool running() { return server_process_ && server_process_->IsRunning(); }
 
-  ~GcsTestbench() override {
-    // Brutal shutdown, kill the full process group because the GCS testbench may launch
-    // additional children.
-    try {
-      group_.terminate();
-    } catch (bp::process_error&) {
-    }
-    if (server_process_.valid()) {
-      server_process_.wait();
-    }
-  }
+  ~GcsTestbench() = default;
 
   const std::string& port() const { return port_; }
   const std::string& error() const { return error_; }
 
  private:
   std::string port_;
-  bp::child server_process_;
-  bp::group group_;
+  std::unique_ptr<util::Process> server_process_;
   std::string error_;
 };
 
diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc
index eb29a677dae9e..003afa68f1e35 100644
--- a/cpp/src/arrow/filesystem/s3_test_util.cc
+++ b/cpp/src/arrow/filesystem/s3_test_util.cc
@@ -15,33 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
 #ifndef _WIN32
 #include <sys/wait.h>
 #endif
 
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#ifdef __MINGW32__
-#include <boost/asio/io_context.hpp>
-#endif
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-
 #include "arrow/filesystem/s3_test_util.h"
 #include "arrow/filesystem/s3fs.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
@@ -53,8 +33,6 @@ namespace fs {
 
 using ::arrow::internal::TemporaryDir;
 
-namespace bp = boost::process;
-
 namespace {
 
 const char* kMinioExecutableName = "minio";
@@ -75,7 +53,7 @@ struct MinioTestServer::Impl {
   std::string connect_string_;
   std::string access_key_ = kMinioAccessKey;
   std::string secret_key_ = kMinioSecretKey;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 MinioTestServer::MinioTestServer() : impl_(new Impl) {}
@@ -105,44 +83,23 @@ Status MinioTestServer::Start() {
 
   ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_, TemporaryDir::Make("s3fs-test-"));
 
-  // Get a copy of the current environment.
-  // (NOTE: using "auto" would return a native_environment that mutates
-  //  the current environment)
-  bp::environment env = boost::this_process::environment();
-  env["MINIO_ACCESS_KEY"] = kMinioAccessKey;
-  env["MINIO_SECRET_KEY"] = kMinioSecretKey;
+  impl_->server_process_ = std::make_unique<util::Process>();
+  impl_->server_process_->SetEnv("MINIO_ACCESS_KEY", kMinioAccessKey);
+  impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey);
   // Disable the embedded console (one less listening address to care about)
-  env["MINIO_BROWSER"] = "off";
-
+  impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
   impl_->connect_string_ = GenerateConnectString();
-  auto exe_path = bp::search_path(kMinioExecutableName);
-  if (exe_path.empty()) {
-    return Status::IOError("Failed to find minio executable ('", kMinioExecutableName,
-                           "') in PATH");
-  }
-
-  try {
-    // NOTE: --quiet makes startup faster by suppressing remote version check
-    impl_->server_process_ = std::make_shared<bp::child>(
-        env, exe_path, "server", "--quiet", "--compat", "--address",
-        impl_->connect_string_, impl_->temp_dir_->path().ToString());
-  } catch (const std::exception& e) {
-    return Status::IOError("Failed to launch Minio server: ", e.what());
-  }
+  ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
+  // NOTE: --quiet makes startup faster by suppressing remote version check
+  impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address",
+                                   impl_->connect_string_,
+                                   impl_->temp_dir_->path().ToString()});
+  ARROW_RETURN_NOT_OK(impl_->server_process_->Execute());
   return Status::OK();
 }
 
 Status MinioTestServer::Stop() {
-  if (impl_->server_process_ && impl_->server_process_->valid()) {
-    // Brutal shutdown
-    impl_->server_process_->terminate();
-    impl_->server_process_->wait();
-#ifndef _WIN32
-    // Despite calling wait() above, boost::process fails to clear zombies
-    // so do it ourselves.
-    waitpid(impl_->server_process_->id(), nullptr, 0);
-#endif
-  }
+  impl_->server_process_ = nullptr;
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 835c4fc83bf18..b12476ac3893a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -70,11 +70,6 @@ if(ARROW_BUILD_BENCHMARKS
     endif()
   endif()
 endif()
-list(APPEND
-     ARROW_FLIGHT_TEST_INTERFACE_LIBS
-     Boost::headers
-     Boost::filesystem
-     Boost::system)
 list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++)
 
 # TODO(wesm): Protobuf shared vs static linking
diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index 057ef15c3c7ae..661c47737f024 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -491,7 +491,7 @@ int main(int argc, char** argv) {
         if (FLAGS_cuda && FLAGS_test_put) {
           server_args.push_back("-cuda");
         }
-        server->Start(server_args);
+        ABORT_NOT_OK(server->Start(server_args));
       }
       std::cout << "Server host: " << FLAGS_server_host << std::endl
                 << "Server port: " << FLAGS_server_port << std::endl;
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 3d52bc3f5ae06..6425233dadec4 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -204,7 +204,7 @@ ARROW_FLIGHT_TEST_ASYNC_CLIENT(GrpcAsyncClientTest);
 
 TEST(TestFlight, ConnectUri) {
   TestServer server("flight-test-server");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
@@ -230,7 +230,7 @@ TEST(TestFlight, InvalidUriScheme) {
 #ifndef _WIN32
 TEST(TestFlight, ConnectUriUnix) {
   TestServer server("flight-test-server", "/tmp/flight-test.sock");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index 127827ff38cdd..aa10d9a7da822 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -17,11 +17,6 @@
 
 #include "arrow/flight/test_util.h"
 
-#ifdef __APPLE__
-#include <limits.h>
-#include <mach-o/dyld.h>
-#endif
-
 #include <algorithm>
 #include <cstdlib>
 #include <fstream>
@@ -31,18 +26,13 @@
 #include "arrow/util/windows_compatibility.h"
 
 #include <gtest/gtest.h>
-#include <boost/filesystem.hpp>
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/array.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/ipc/test_common.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/logging.h"
 
@@ -51,101 +41,27 @@
 
 namespace arrow::flight {
 
-namespace bp = boost::process;
-namespace fs = boost::filesystem;
-
-namespace {
-
-Status ResolveCurrentExecutable(fs::path* out) {
-  // See https://stackoverflow.com/a/1024937/10194 for various
-  // platform-specific recipes.
-
-  boost::system::error_code ec;
-
-#if defined(__linux__)
-  *out = fs::canonical("/proc/self/exe", ec);
-#elif defined(__APPLE__)
-  char buf[PATH_MAX + 1];
-  uint32_t bufsize = sizeof(buf);
-  if (_NSGetExecutablePath(buf, &bufsize) < 0) {
-    return Status::Invalid("Can't resolve current exe: path too large");
-  }
-  *out = fs::canonical(buf, ec);
-#elif defined(_WIN32)
-  char buf[MAX_PATH + 1];
-  if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
-    return Status::Invalid("Can't get executable file path");
-  }
-  *out = fs::canonical(buf, ec);
-#else
-  ARROW_UNUSED(ec);
-  return Status::NotImplemented("Not available on this system");
-#endif
-  if (ec) {
-    // XXX fold this into the Status class?
-    return Status::IOError("Can't resolve current exe: ", ec.message());
+Status TestServer::Start(const std::vector<std::string>& extra_args) {
+  server_process_ = std::make_unique<util::Process>();
+  ARROW_RETURN_NOT_OK(server_process_->SetExecutable(executable_name_));
+  std::vector<std::string> args = {};
+  if (unix_sock_.empty()) {
+    args.push_back("-port");
+    args.push_back(std::to_string(port_));
   } else {
-    return Status::OK();
-  }
-}
-
-}  // namespace
-
-void TestServer::Start(const std::vector<std::string>& extra_args) {
-  namespace fs = boost::filesystem;
-
-  std::string str_port = std::to_string(port_);
-  std::vector<fs::path> search_path = ::boost::this_process::path();
-  // If possible, prepend current executable directory to search path,
-  // since it's likely that the test server executable is located in
-  // the same directory as the running unit test.
-  fs::path current_exe;
-  Status st = ResolveCurrentExecutable(&current_exe);
-  if (st.ok()) {
-    search_path.insert(search_path.begin(), current_exe.parent_path());
-  } else if (st.IsNotImplemented()) {
-    ARROW_CHECK(st.IsNotImplemented()) << st.ToString();
-  }
-
-  try {
-    if (unix_sock_.empty()) {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-port", str_port, bp::args(extra_args));
-    } else {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-server_unix", unix_sock_, bp::args(extra_args));
-    }
-  } catch (...) {
-    std::stringstream ss;
-    ss << "Failed to launch test server '" << executable_name_ << "', looked in ";
-    for (const auto& path : search_path) {
-      ss << path << " : ";
-    }
-    ARROW_LOG(FATAL) << ss.str();
-    throw;
+    args.push_back("-server_unix");
+    args.push_back(unix_sock_);
   }
-  std::cout << "Server running with pid " << server_process_->id() << std::endl;
+  args.insert(args.end(), extra_args.begin(), extra_args.end());
+  server_process_->SetArgs(args);
+  ARROW_RETURN_NOT_OK(server_process_->Execute());
+  std::cout << "Server running with pid " << server_process_->pid() << std::endl;
+  return Status::OK();
 }
 
-int TestServer::Stop() {
-  if (server_process_ && server_process_->valid()) {
-#ifndef _WIN32
-    kill(server_process_->id(), SIGTERM);
-#else
-    // This would use SIGKILL on POSIX, which is more brutal than SIGTERM
-    server_process_->terminate();
-#endif
-    server_process_->wait();
-    return server_process_->exit_code();
-  } else {
-    // Presumably the server wasn't able to start
-    return -1;
-  }
-}
+void TestServer::Stop() { server_process_ = nullptr; }
 
-bool TestServer::IsRunning() { return server_process_->running(); }
+bool TestServer::IsRunning() { return server_process_->IsRunning(); }
 
 int TestServer::port() const { return port_; }
 
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index 15ba6145ecd2b..946caebcc2b5a 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -29,6 +29,7 @@
 
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 
 #include "arrow/flight/client.h"
@@ -36,14 +37,6 @@
 #include "arrow/flight/types.h"
 #include "arrow/flight/visibility.h"
 
-namespace boost {
-namespace process {
-
-class child;
-
-}  // namespace process
-}  // namespace boost
-
 namespace arrow {
 namespace flight {
 
@@ -76,10 +69,10 @@ class ARROW_FLIGHT_EXPORT TestServer {
   TestServer(const std::string& executable_name, const std::string& unix_sock)
       : executable_name_(executable_name), unix_sock_(unix_sock) {}
 
-  void Start(const std::vector<std::string>& extra_args);
-  void Start() { Start({}); }
+  Status Start(const std::vector<std::string>& extra_args);
+  Status Start() { return Start({}); }
 
-  int Stop();
+  void Stop();
 
   bool IsRunning();
 
@@ -90,7 +83,7 @@ class ARROW_FLIGHT_EXPORT TestServer {
   std::string executable_name_;
   int port_;
   std::string unix_sock_;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 // Helper to initialize a server and matching client with callbacks to
diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc
new file mode 100644
index 0000000000000..32da81f14630e
--- /dev/null
+++ b/cpp/src/arrow/testing/process.cc
@@ -0,0 +1,298 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/process.h"
+#include "arrow/result.h"
+
+// This boost/asio/io_context.hpp include is needless for no MinGW
+// build.
+//
+// This is for including boost/asio/detail/socket_types.hpp before any
+// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
+// work if windows.h is already included.
+#include <boost/asio/io_context.hpp>
+
+#ifdef BOOST_PROCESS_HAVE_V2
+// We can't use v2 API on Windows because v2 API doesn't support
+// process group [1] and GCS testbench uses multiple processes [2].
+//
+// [1] https://github.com/boostorg/process/issues/259
+// [2] https://github.com/googleapis/storage-testbench/issues/669
+#ifndef _WIN32
+#define BOOST_PROCESS_USE_V2
+#endif
+#endif
+
+#ifdef BOOST_PROCESS_USE_V2
+#ifdef BOOST_PROCESS_NEED_SOURCE
+// Workaround for https://github.com/boostorg/process/issues/312
+#define BOOST_PROCESS_V2_SEPARATE_COMPILATION
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#endif
+#include <boost/process/v2.hpp>
+#include <boost/process/v2/src.hpp>
+#else
+#include <boost/process/v2.hpp>
+#endif
+#include <unordered_map>
+#else
+// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
+// boost/process.hpp. boost/process/detail/windows/handle_workaround.hpp
+// doesn't work without BOOST_USE_WINDOWS_H with MinGW because MinGW
+// doesn't provide __kernel_entry without winternl.h.
+//
+// See also:
+// https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
+#ifdef __MINGW32__
+#define BOOST_USE_WINDOWS_H = 1
+#endif
+#ifdef BOOST_PROCESS_HAVE_V1
+#include <boost/process/v1.hpp>
+#else
+#include <boost/process.hpp>
+#endif
+#endif
+
+#ifdef __APPLE__
+#include <limits.h>
+#include <mach-o/dyld.h>
+#endif
+
+#include <chrono>
+#include <iostream>
+#include <sstream>
+#include <thread>
+
+#ifdef BOOST_PROCESS_USE_V2
+namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE;
+namespace process = BOOST_PROCESS_V2_NAMESPACE;
+namespace filesystem = process::filesystem;
+#elif defined(BOOST_PROCESS_HAVE_V1)
+namespace process = boost::process::v1;
+namespace filesystem = boost::process::v1::filesystem;
+#else
+namespace process = boost::process;
+namespace filesystem = boost::filesystem;
+#endif
+
+namespace arrow::util {
+
+class Process::Impl {
+ public:
+  Impl() {
+    // Get a copy of the current environment.
+#ifdef BOOST_PROCESS_USE_V2
+    for (const auto& kv : process::environment::current()) {
+      env_[kv.key()] = process::environment::value(kv.value());
+    }
+#else
+    env_ = process::environment(boost::this_process::environment());
+#endif
+  }
+
+  ~Impl() {
+#ifdef BOOST_PROCESS_USE_V2
+    // V2 doesn't provide process group support yet:
+    // https://github.com/boostorg/process/issues/259
+    //
+    // So we try graceful shutdown (SIGTERM + waitpid()) before
+    // immediate shutdown (SIGKILL). This assumes that the target
+    // executable such as "python3 -m testbench" terminates all related
+    // processes by graceful shutdown.
+    boost::system::error_code error_code;
+    if (process_ && process_->running(error_code)) {
+      process_->request_exit(error_code);
+      if (!error_code) {
+        auto timeout = std::chrono::seconds(3);
+        std::chrono::time_point<std::chrono::steady_clock> end =
+            std::chrono::steady_clock::now() + timeout;
+        while (process_->running(error_code) && std::chrono::steady_clock::now() < end) {
+          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+        }
+      }
+    }
+#else
+    process_group_ = nullptr;
+#endif
+    process_ = nullptr;
+  }
+
+  Status SetExecutable(const std::string& name) {
+#ifdef BOOST_PROCESS_USE_V2
+    executable_ = process::environment::find_executable(name);
+#else
+    executable_ = process::search_path(name);
+#endif
+    if (executable_.empty()) {
+      // Search the current executable directory as fallback.
+      ARROW_ASSIGN_OR_RAISE(auto current_exe, ResolveCurrentExecutable());
+#ifdef BOOST_PROCESS_USE_V2
+      std::unordered_map<process::environment::key, process::environment::value> env;
+      for (const auto& kv : process::environment::current()) {
+        env[kv.key()] = process::environment::value(kv.value());
+      }
+      env["PATH"] = process::environment::value(current_exe.parent_path());
+      executable_ = process::environment::find_executable(name, env);
+#else
+      executable_ = process::search_path(name, {current_exe.parent_path()});
+#endif
+    }
+    if (executable_.empty()) {
+      return Status::IOError("Failed to find '", name, "' in PATH");
+    }
+    return Status::OK();
+  }
+
+  void SetArgs(const std::vector<std::string>& args) { args_ = args; }
+
+  void SetEnv(const std::string& name, const std::string& value) {
+#ifdef BOOST_PROCESS_USE_V2
+    env_[name] = process::environment::value(value);
+#else
+    env_[name] = value;
+#endif
+  }
+
+  void IgnoreStderr() { keep_stderr_ = false; }
+
+  Status Execute() {
+    try {
+#ifdef BOOST_PROCESS_USE_V2
+      return ExecuteV2();
+#else
+      return ExecuteV1();
+#endif
+    } catch (const std::exception& e) {
+      return Status::IOError("Failed to launch '", executable_, "': ", e.what());
+    }
+  }
+
+  bool IsRunning() {
+#ifdef BOOST_PROCESS_USE_V2
+    boost::system::error_code error_code;
+    return process_ && process_->running(error_code);
+#else
+    return process_ && process_->running();
+#endif
+  }
+
+  uint64_t pid() {
+    if (!process_) {
+      return 0;
+    }
+    return process_->id();
+  }
+
+ private:
+  filesystem::path executable_;
+  std::vector<std::string> args_;
+  bool keep_stderr_ = true;
+#ifdef BOOST_PROCESS_USE_V2
+  std::unordered_map<process::environment::key, process::environment::value> env_;
+  std::unique_ptr<process::process> process_;
+  asio::io_context ctx_;
+  // boost/process/v2/ doesn't support process group yet:
+  // https://github.com/boostorg/process/issues/259
+#else
+  process::environment env_;
+  std::unique_ptr<process::child> process_;
+  std::unique_ptr<process::group> process_group_;
+#endif
+
+  Result<filesystem::path> ResolveCurrentExecutable() {
+    // See https://stackoverflow.com/a/1024937/10194 for various
+    // platform-specific recipes.
+
+    filesystem::path path;
+    boost::system::error_code error_code;
+
+#if defined(__linux__)
+    path = filesystem::canonical("/proc/self/exe", error_code);
+#elif defined(__APPLE__)
+    char buf[PATH_MAX + 1];
+    uint32_t bufsize = sizeof(buf);
+    if (_NSGetExecutablePath(buf, &bufsize) < 0) {
+      return Status::Invalid("Can't resolve current exe: path too large");
+    }
+    path = filesystem::canonical(buf, error_code);
+#elif defined(_WIN32)
+    char buf[MAX_PATH + 1];
+    if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
+      return Status::Invalid("Can't get executable file path");
+    }
+    path = filesystem::canonical(buf, error_code);
+#else
+    ARROW_UNUSED(error_code);
+    return Status::NotImplemented("Not available on this system");
+#endif
+    if (error_code) {
+      // XXX fold this into the Status class?
+      return Status::IOError("Can't resolve current exe: ", error_code.message());
+    } else {
+      return path;
+    }
+  }
+
+#ifdef BOOST_PROCESS_USE_V2
+  Status ExecuteV2() {
+    process::process_environment env(env_);
+    // We can't use std::make_unique<process::process>.
+    process_ = std::unique_ptr<process::process>(
+        new process::process(ctx_, executable_, args_, env,
+                             keep_stderr_ ? process::process_stdio{{}, {}, {}}
+                                          : process::process_stdio{{}, {}, nullptr}));
+    return Status::OK();
+  }
+#else
+  Status ExecuteV1() {
+    process_group_ = std::make_unique<process::group>();
+    if (keep_stderr_) {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_);
+    } else {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_,
+                                                  process::std_err > process::null);
+    }
+    return Status::OK();
+  }
+#endif
+};
+
+Process::Process() : impl_(new Impl()) {}
+
+Process::~Process() {}
+
+Status Process::SetExecutable(const std::string& path) {
+  return impl_->SetExecutable(path);
+}
+
+void Process::SetArgs(const std::vector<std::string>& args) { impl_->SetArgs(args); }
+
+void Process::SetEnv(const std::string& key, const std::string& value) {
+  impl_->SetEnv(key, value);
+}
+
+void Process::IgnoreStderr() { impl_->IgnoreStderr(); }
+
+Status Process::Execute() { return impl_->Execute(); }
+
+bool Process::IsRunning() { return impl_->IsRunning(); }
+
+uint64_t Process::pid() { return impl_->pid(); }
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/process.h b/cpp/src/arrow/testing/process.h
new file mode 100644
index 0000000000000..d4d2ae124f427
--- /dev/null
+++ b/cpp/src/arrow/testing/process.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/testing/visibility.h"
+
+namespace arrow::util {
+
+class ARROW_TESTING_EXPORT Process {
+ public:
+  Process();
+  ~Process();
+
+  Status SetExecutable(const std::string& path);
+  void SetArgs(const std::vector<std::string>& args);
+  void SetEnv(const std::string& name, const std::string& value);
+  void IgnoreStderr();
+  Status Execute();
+  bool IsRunning();
+  uint64_t pid();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+}  // namespace arrow::util
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index c092ff4fd011f..c2bc7fc02797e 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -53,8 +53,8 @@ add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
                                           ${GANDIVA_PRECOMPILED_CC_PATH})
 
 # testing
-if(ARROW_BUILD_TESTS)
-  add_executable(gandiva-precompiled-test
+add_gandiva_test(precompiled-test
+                 SOURCES
                  ../context_helper.cc
                  bitmap_test.cc
                  bitmap.cc
@@ -75,16 +75,12 @@ if(ARROW_BUILD_TESTS)
                  decimal_ops_test.cc
                  decimal_ops.cc
                  ../decimal_type_util.cc
-                 ../decimal_xlarge.cc)
-  target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
-  target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}
-                                                         Boost::headers)
-  target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
-                                                              ARROW_STATIC GANDIVA_STATIC)
-  set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
-  add_test(gandiva-precompiled-test ${TEST_PATH})
-  set_property(TEST gandiva-precompiled-test
-               APPEND
-               PROPERTY LABELS "unittest;gandiva-tests")
-  add_dependencies(gandiva-tests gandiva-precompiled-test)
-endif()
+                 ../decimal_xlarge.cc
+                 EXTRA_INCLUDES
+                 ${CMAKE_SOURCE_DIR}/src
+                 EXTRA_LINK_LIBS
+                 Boost::headers
+                 DEFINITIONS
+                 GANDIVA_UNIT_TEST=1
+                 ARROW_STATIC
+                 GANDIVA_STATIC)
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 6f825b55cfd94..103e678ebb4ac 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -15,11 +15,11 @@
       ]
     },
     "benchmark",
+    "boost-crc",
     "boost-filesystem",
     "boost-multiprecision",
     "boost-process",
     "boost-system",
-    "boost-crc",
     "brotli",
     "bzip2",
     "c-ares",

From 589ab7aca8179a749eeef091884bebc12700f168 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:13:17 +0900
Subject: [PATCH 56/63] MINOR: [CI] Bump actions/setup-python from 5.1.1 to
 5.2.0 (#43917)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5.1.1 to 5.2.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/setup-python/releases">actions/setup-python's releases</a>.</em></p>
<blockquote>
<h2>v5.2.0</h2>
<h2>What's Changed</h2>
<h3>Bug fixes:</h3>
<ul>
<li>Add <code>.zip</code> extension to Windows package downloads for <code>Expand-Archive</code> Compatibility by <a href="https://github.com/priyagupta108"><code>@​priyagupta108</code></a> in <a href="https://redirect.github.com/actions/setup-python/pull/916">actions/setup-python#916</a>
This addresses compatibility issues on Windows self-hosted runners by ensuring that the filenames for Python and PyPy package downloads explicitly include the .zip extension, allowing the Expand-Archive command to function correctly.</li>
<li>Add arch to cache key by <a href="https://github.com/Zxilly"><code>@​Zxilly</code></a> in <a href="https://redirect.github.com/actions/setup-python/pull/896">actions/setup-python#896</a>
This addresses issues with caching by adding the architecture (arch) to the cache key, ensuring that cache keys are accurate to prevent conflicts</li>
</ul>
<h3>Documentation changes:</h3>
<ul>
<li>Fix display of emojis in contributors doc by <a href="https://github.com/sciencewhiz"><code>@​sciencewhiz</code></a> in <a href="https://redirect.github.com/actions/setup-python/pull/899">actions/setup-python#899</a></li>
<li>Documentation update for caching poetry dependencies by <a href="https://github.com/gowridurgad"><code>@​gowridurgad</code></a> in <a href="https://redirect.github.com/actions/setup-python/pull/908">actions/setup-python#908</a></li>
</ul>
<h3>Dependency updates:</h3>
<ul>
<li>Bump <code>@​iarna/toml</code> version from 2.2.5 to 3.0.0 by <a href="https://github.com/priya-kinthali"><code>@​priya-kinthali</code></a> in <a href="https://redirect.github.com/actions/setup-python/pull/912">actions/setup-python#912</a></li>
<li>Bump pyinstaller from 3.6 to 5.13.1 by <a href="https://github.com/aparnajyothi-y"><code>@​aparnajyothi-y</code></a> in <a href="https://redirect.github.com/actions/setup-python/pull/923">actions/setup-python#923</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/sciencewhiz"><code>@​sciencewhiz</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-python/pull/899">actions/setup-python#899</a></li>
<li><a href="https://github.com/priyagupta108"><code>@​priyagupta108</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-python/pull/916">actions/setup-python#916</a></li>
<li><a href="https://github.com/Zxilly"><code>@​Zxilly</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-python/pull/896">actions/setup-python#896</a></li>
<li><a href="https://github.com/aparnajyothi-y"><code>@​aparnajyothi-y</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-python/pull/923">actions/setup-python#923</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/setup-python/compare/v5...v5.2.0">https://github.com/actions/setup-python/compare/v5...v5.2.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/actions/setup-python/commit/f677139bbe7f9c59b41e40162b753c062f5d49a3"><code>f677139</code></a> Bump pyinstaller from 3.6 to 5.13.1 in /<strong>tests</strong>/data (<a href="https://redirect.github.com/actions/setup-python/issues/923">#923</a>)</li>
<li><a href="https://github.com/actions/setup-python/commit/2bd53f9a4d1dd1cd21eaffcc01a7b91a8e73ea4c"><code>2bd53f9</code></a> Documentation update for caching poetry dependencies (<a href="https://redirect.github.com/actions/setup-python/issues/908">#908</a>)</li>
<li><a href="https://github.com/actions/setup-python/commit/80b49d3ed89312896dbdcbefc2ddb159c7f8ca43"><code>80b49d3</code></a> fix: add arch to cache key (<a href="https://redirect.github.com/actions/setup-python/issues/896">#896</a>)</li>
<li><a href="https://github.com/actions/setup-python/commit/036a5236741fd24c89eea80d1b76179e8e5f9214"><code>036a523</code></a> Fix: Add <code>.zip</code> extension to Windows package downloads for <code>Expand-Archive</code> C...</li>
<li><a href="https://github.com/actions/setup-python/commit/04c1311429f7be71707d8ab66c7af8a14e54b938"><code>04c1311</code></a> Fix display of emojis in contributors doc (<a href="https://redirect.github.com/actions/setup-python/issues/899">#899</a>)</li>
<li><a href="https://github.com/actions/setup-python/commit/cb6845644151e35f879e10f2f0896c3c8bee372c"><code>cb68456</code></a> Updated <code>@​iarna/toml</code> version to 3.0.0 (<a href="https://redirect.github.com/actions/setup-python/issues/912">#912</a>)</li>
<li>See full diff in <a href="https://github.com/actions/setup-python/compare/v5.1.1...v5.2.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-python&package-manager=github_actions&previous-version=5.1.1&new-version=5.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/archery.yml      | 2 +-
 .github/workflows/comment_bot.yml  | 2 +-
 .github/workflows/cpp.yml          | 4 ++--
 .github/workflows/csharp.yml       | 2 +-
 .github/workflows/dev.yml          | 4 ++--
 .github/workflows/docs.yml         | 2 +-
 .github/workflows/docs_light.yml   | 2 +-
 .github/workflows/go.yml           | 6 +++---
 .github/workflows/integration.yml  | 2 +-
 .github/workflows/java.yml         | 2 +-
 .github/workflows/java_jni.yml     | 4 ++--
 .github/workflows/java_nightly.yml | 2 +-
 .github/workflows/js.yml           | 2 +-
 .github/workflows/pr_bot.yml       | 2 +-
 .github/workflows/python.yml       | 4 ++--
 .github/workflows/r.yml            | 4 ++--
 .github/workflows/r_nightly.yml    | 2 +-
 .github/workflows/ruby.yml         | 2 +-
 18 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index b016f7d11b9fa..2c46071010962 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -58,7 +58,7 @@ jobs:
         shell: bash
         run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.9'
       - name: Install pygit2 binary wheel
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 1138c0a02f812..b7af4c5800835 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -41,7 +41,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index d51438c5f193a..20bcfcb38da69 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -243,7 +243,7 @@ jobs:
           $(brew --prefix bash)/bin/bash \
             ci/scripts/install_minio.sh latest ${ARROW_HOME}
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: 3.12
       - name: Install Google Cloud Storage Testbench
@@ -462,7 +462,7 @@ jobs:
             https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
           chmod +x /usr/local/bin/minio.exe
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         id: python-install
         with:
           python-version: 3.9
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 6e8548dc960f4..c618350affbeb 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -108,7 +108,7 @@ jobs:
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Checkout Arrow
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index cc3ff6330746d..1cc8d993498b6 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -45,7 +45,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install pre-commit
@@ -104,7 +104,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.12'
       - name: Install Ruby
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 25db1c39ad89e..1219f7526f9f2 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -52,7 +52,7 @@ jobs:
           key: debian-docs-${{ hashFiles('cpp/**') }}
           restore-keys: debian-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index ea7fe5d02d7b8..454affd7fa7f9 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -58,7 +58,7 @@ jobs:
           key: conda-docs-${{ hashFiles('cpp/**') }}
           restore-keys: conda-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index b9a19d182d5c4..9b18b010a0cb9 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -207,7 +207,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -247,7 +247,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -339,7 +339,7 @@ jobs:
           github.event_name == 'push' &&
           github.repository == 'apache/arrow' &&
           github.ref_name == 'main'
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.10'
       - name: Run Benchmarks
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 43f8af0a600d8..3a6b568c5207f 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -89,7 +89,7 @@ jobs:
           key: conda-${{ hashFiles('cpp/**') }}
           restore-keys: conda-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 0317879b580ba..8560f0dd1cbe9 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -76,7 +76,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index c2bc679e681a2..f204d6459ae01 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -70,7 +70,7 @@ jobs:
           key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
           restore-keys: java-jni-manylinux-2014-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -110,7 +110,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml
index 72afb6dbf1c1d..0bf0c27288faf 100644
--- a/.github/workflows/java_nightly.yml
+++ b/.github/workflows/java_nightly.yml
@@ -58,7 +58,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 630bef61105f6..4ab9831924fb1 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -54,7 +54,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml
index 7dd06b6aeec09..bbb1a2d7228d0 100644
--- a/.github/workflows/pr_bot.yml
+++ b/.github/workflows/pr_bot.yml
@@ -82,7 +82,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 90d3a50af3705..b88ea7ce4f1ee 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -107,7 +107,7 @@ jobs:
           key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
           restore-keys: ${{ matrix.cache }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -177,7 +177,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.11'
       - name: Install Dependencies
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 2820d42470bca..21afa4586b5a4 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -146,7 +146,7 @@ jobs:
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -206,7 +206,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index 1ec071b6bbb5e..9817e41d3b61d 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -60,7 +60,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 4b74b8d7fc84d..228bacb77e58a 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -83,7 +83,7 @@ jobs:
           key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
           restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery

From 4ed5a149695644fe364466eabcae38d8dabfc090 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 3 Sep 2024 10:13:48 +0900
Subject: [PATCH 57/63] GH-43797: [C++] Attach `arrow::ArrayStatistics` to
 `arrow::ArrayData` (#43801)

### Rationale for this change

If we can attach associated statistics to an array via `ArrayData`, we can use it in later processes such as query planning.

If `ArrayData` not `Array` has statistics, we can use statistics in computing kernels.

There was a concern that associated `arrow::ArrayStatistics` may be outdated if `arrow::ArrayData` is mutated after attaching `arrow::ArrayStatistics`. But `arrow::ArrayData` isn't mutable after the first population. So `arrow::ArrayStatistics` will not be outdated. We can require mutators to take responsibility for statistics.

### What changes are included in this PR?

* Add `arrow::ArrayData::statistics`
* Add `arrow::Array::statistics()` to get statistics attached in `arrow::ArrayData`

This doesn't provide a new `arrow::ArrayData` constructor (`arrow::ArrayData::Make()`) that accepts `arrow::ArrayStatistics`. We can change `arrow::ArrayData::statistics` after we create `arrow::ArrayData`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.

`arrow::Array::statistics()` is a new public API.
* GitHub Issue: #43797

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/array/array_base.h  |   8 ++
 cpp/src/arrow/array/array_test.cc | 126 ++++++++++++++++++++++++++++++
 cpp/src/arrow/array/data.cc       |   3 +
 cpp/src/arrow/array/data.h        |  24 +++++-
 4 files changed, 159 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 716ae0722069e..e4af67d7e5f0b 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -232,6 +232,14 @@ class ARROW_EXPORT Array {
   /// \return DeviceAllocationType
   DeviceAllocationType device_type() const { return data_->device_type(); }
 
+  /// \brief Return the statistics of this Array
+  ///
+  /// This just delegates to calling statistics on the underlying ArrayData
+  /// object which backs this Array.
+  ///
+  /// \return const ArrayStatistics&
+  std::shared_ptr<ArrayStatistics> statistics() const { return data_->statistics; }
+
  protected:
   Array() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 32806d9d2edb3..73e0c692432b6 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -3709,6 +3709,132 @@ TEST(TestSwapEndianArrayData, InvalidLength) {
   }
 }
 
+class TestArrayDataStatistics : public ::testing::Test {
+ public:
+  void SetUp() {
+    valids_ = {1, 0, 1, 1};
+    null_count_ = std::count(valids_.begin(), valids_.end(), 0);
+    null_buffer_ = *internal::BytesToBits(valids_);
+    values_ = {1, 0, 3, -4};
+    min_ = *std::min_element(values_.begin(), values_.end());
+    max_ = *std::max_element(values_.begin(), values_.end());
+    values_buffer_ = Buffer::FromVector(values_);
+    data_ = ArrayData::Make(int32(), values_.size(), {null_buffer_, values_buffer_},
+                            null_count_);
+    data_->statistics = std::make_shared<ArrayStatistics>();
+    data_->statistics->null_count = null_count_;
+    data_->statistics->min = min_;
+    data_->statistics->is_min_exact = true;
+    data_->statistics->max = max_;
+    data_->statistics->is_max_exact = true;
+  }
+
+ protected:
+  std::vector<uint8_t> valids_;
+  size_t null_count_;
+  std::shared_ptr<Buffer> null_buffer_;
+  std::vector<int32_t> values_;
+  int64_t min_;
+  int64_t max_;
+  std::shared_ptr<Buffer> values_buffer_;
+  std::shared_ptr<ArrayData> data_;
+};
+
+TEST_F(TestArrayDataStatistics, MoveConstructor) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data(std::move(copied_data));
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyConstructor) {
+  ArrayData copied_data(*data_);
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, MoveAssignment) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data;
+  moved_data = std::move(copied_data);
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyAssignment) {
+  ArrayData copied_data;
+  copied_data = *data_;
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyTo) {
+  ASSERT_OK_AND_ASSIGN(auto copied_data,
+                       data_->CopyTo(arrow::default_cpu_memory_manager()));
+
+  ASSERT_TRUE(copied_data->statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data->statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data->statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_TRUE(copied_data->statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data->statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_TRUE(copied_data->statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, Slice) {
+  auto sliced_data = data_->Slice(0, 1);
+  ASSERT_FALSE(sliced_data->statistics);
+}
+
 template <typename PType>
 class TestPrimitiveArray : public ::testing::Test {
  public:
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 83eeb56c496cf..8e29297a8c175 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -165,6 +165,8 @@ Result<std::shared_ptr<ArrayData>> CopyToImpl(const ArrayData& data,
     ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
   }
 
+  output->statistics = data.statistics;
+
   return output;
 }
 }  // namespace
@@ -195,6 +197,7 @@ std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
   } else {
     copy->null_count = null_count != 0 ? kUnknownNullCount : 0;
   }
+  copy->statistics = nullptr;
   return copy;
 }
 
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index e0508fe6980a7..1e6ee9a1d32ff 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/array/statistics.h"
 #include "arrow/buffer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
@@ -152,7 +153,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(std::move(other.buffers)),
         child_data(std::move(other.child_data)),
-        dictionary(std::move(other.dictionary)) {
+        dictionary(std::move(other.dictionary)),
+        statistics(std::move(other.statistics)) {
     SetNullCount(other.null_count);
   }
 
@@ -163,7 +165,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(other.buffers),
         child_data(other.child_data),
-        dictionary(other.dictionary) {
+        dictionary(other.dictionary),
+        statistics(other.statistics) {
     SetNullCount(other.null_count);
   }
 
@@ -176,6 +179,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = std::move(other.buffers);
     child_data = std::move(other.child_data);
     dictionary = std::move(other.dictionary);
+    statistics = std::move(other.statistics);
     return *this;
   }
 
@@ -188,6 +192,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = other.buffers;
     child_data = other.child_data;
     dictionary = other.dictionary;
+    statistics = other.statistics;
     return *this;
   }
 
@@ -274,6 +279,18 @@ struct ARROW_EXPORT ArrayData {
   }
 
   /// \brief Construct a zero-copy slice of the data with the given offset and length
+  ///
+  /// The associated `ArrayStatistics` is always discarded in a sliced
+  /// `ArrayData`. Because `ArrayStatistics` in the original
+  /// `ArrayData` may be invalid in a sliced `ArrayData`. If you want
+  /// to reuse statistics in the original `ArrayData`, you need to do
+  /// it by yourself.
+  ///
+  /// If the specified slice range has the same range as the original
+  /// `ArrayData`, we can reuse statistics in the original
+  /// `ArrayData`. Because it has the same data as the original
+  /// `ArrayData`. But the associated `ArrayStatistics` is discarded
+  /// in this case too. Use `Copy()` instead for the case.
   std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
 
   /// \brief Input-checking variant of Slice
@@ -390,6 +407,9 @@ struct ARROW_EXPORT ArrayData {
 
   // The dictionary for this Array, if any. Only used for dictionary type
   std::shared_ptr<ArrayData> dictionary;
+
+  // The statistics for this Array.
+  std::shared_ptr<ArrayStatistics> statistics;
 };
 
 /// \brief A non-owning Buffer reference

From 1475bd815bbdcd2bbcc6d6e74a7d8df5fe369ea5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 14:58:59 +0900
Subject: [PATCH 58/63] MINOR: [Java] Bump org.mockito:mockito-junit-jupiter
 from 5.12.0 to 5.13.0 in /java (#43919)

Bumps [org.mockito:mockito-junit-jupiter](https://github.com/mockito/mockito) from 5.12.0 to 5.13.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/mockito/mockito/releases">org.mockito:mockito-junit-jupiter's releases</a>.</em></p>
<blockquote>
<h2>v5.13.0</h2>
<p><em>Changelog generated by <a href="https://github.com/shipkit/shipkit-changelog">Shipkit Changelog Gradle Plugin</a></em></p>
<h4>5.13.0</h4>
<ul>
<li>2024-08-27 - <a href="https://github.com/mockito/mockito/compare/v5.12.0...v5.13.0">43 commit(s)</a> by Breno A, Caleb Cushing, Jinwoo, Kurt Alfred Kluever, Stefano Cordio, Thach Le, dependabot[bot]</li>
<li>Bump versions.bytebuddy from 1.14.19 to 1.15.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3429">#3429</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3429">mockito/mockito#3429</a>)</li>
<li>Bump org.jetbrains.kotlin:kotlin-stdlib from 2.0.10 to 2.0.20 [(<a href="https://redirect.github.com/mockito/mockito/issues/3427">#3427</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3427">mockito/mockito#3427</a>)</li>
<li>Bump org.junit.platform:junit-platform-launcher from 1.10.3 to 1.11.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3425">#3425</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3425">mockito/mockito#3425</a>)</li>
<li>Bump com.gradle.enterprise from 3.17.6 to 3.18 [(<a href="https://redirect.github.com/mockito/mockito/issues/3423">#3423</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3423">mockito/mockito#3423</a>)</li>
<li>Fix a typo in InjectMocks [(<a href="https://redirect.github.com/mockito/mockito/issues/3422">#3422</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3422">mockito/mockito#3422</a>)</li>
<li>Bump versions.bytebuddy from 1.14.18 to 1.14.19 [(<a href="https://redirect.github.com/mockito/mockito/issues/3417">#3417</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3417">mockito/mockito#3417</a>)</li>
<li>Bump androidx.test:runner from 1.6.1 to 1.6.2 [(<a href="https://redirect.github.com/mockito/mockito/issues/3415">#3415</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3415">mockito/mockito#3415</a>)</li>
<li>Bump versions.junitJupiter from 5.10.3 to 5.11.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3413">#3413</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3413">mockito/mockito#3413</a>)</li>
<li>Bump org.jetbrains.kotlin:kotlin-stdlib from 2.0.0 to 2.0.10 [(<a href="https://redirect.github.com/mockito/mockito/issues/3409">#3409</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3409">mockito/mockito#3409</a>)</li>
<li>Bump org.hamcrest:hamcrest-core from 2.2 to 3.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3408">#3408</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3408">mockito/mockito#3408</a>)</li>
<li>Bump com.google.googlejavaformat:google-java-format from 1.22.0 to 1.23.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3407">#3407</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3407">mockito/mockito#3407</a>)</li>
<li>Bump org.shipkit:shipkit-auto-version from 2.0.9 to 2.0.10 [(<a href="https://redirect.github.com/mockito/mockito/issues/3405">#3405</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3405">mockito/mockito#3405</a>)</li>
<li>Bump com.gradle.enterprise from 3.17.5 to 3.17.6 [(<a href="https://redirect.github.com/mockito/mockito/issues/3404">#3404</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3404">mockito/mockito#3404</a>)</li>
<li>Bump gradle/wrapper-validation-action from 3.4.2 to 3.5.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3401">#3401</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3401">mockito/mockito#3401</a>)</li>
<li>Bump org.assertj:assertj-core from 3.26.0 to 3.26.3 [(<a href="https://redirect.github.com/mockito/mockito/issues/3398">#3398</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3398">mockito/mockito#3398</a>)</li>
<li>Bump versions.bytebuddy from 1.14.17 to 1.14.18 [(<a href="https://redirect.github.com/mockito/mockito/issues/3397">#3397</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3397">mockito/mockito#3397</a>)</li>
<li>ci: add .m2 dependencies cache [(<a href="https://redirect.github.com/mockito/mockito/issues/3396">#3396</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3396">mockito/mockito#3396</a>)</li>
<li>Bump org.codehaus.groovy:groovy from 3.0.21 to 3.0.22 [(<a href="https://redirect.github.com/mockito/mockito/issues/3394">#3394</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3394">mockito/mockito#3394</a>)</li>
<li>Bump androidx.test:runner from 1.6.0 to 1.6.1 [(<a href="https://redirect.github.com/mockito/mockito/issues/3393">#3393</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3393">mockito/mockito#3393</a>)</li>
<li>Bump org.junit.platform:junit-platform-launcher from 1.10.2 to 1.10.3 [(<a href="https://redirect.github.com/mockito/mockito/issues/3392">#3392</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3392">mockito/mockito#3392</a>)</li>
<li>Gradle lazy configuration [(<a href="https://redirect.github.com/mockito/mockito/issues/3391">#3391</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3391">mockito/mockito#3391</a>)</li>
<li>Bump androidx.test.ext:junit from 1.2.0 to 1.2.1 [(<a href="https://redirect.github.com/mockito/mockito/issues/3388">#3388</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3388">mockito/mockito#3388</a>)</li>
<li>docs: cleanup javadoc for modularity [(<a href="https://redirect.github.com/mockito/mockito/issues/3386">#3386</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3386">mockito/mockito#3386</a>)</li>
<li>Bump versions.junitJupiter from 5.10.2 to 5.10.3 [(<a href="https://redirect.github.com/mockito/mockito/issues/3385">#3385</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3385">mockito/mockito#3385</a>)</li>
<li>Bump androidx.test.ext:junit from 1.1.5 to 1.2.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3383">#3383</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3383">mockito/mockito#3383</a>)</li>
<li>Bump androidx.test:runner from 1.5.2 to 1.6.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3382">#3382</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3382">mockito/mockito#3382</a>)</li>
<li>Bump net.ltgt.gradle:gradle-errorprone-plugin from 4.0.0 to 4.0.1 [(<a href="https://redirect.github.com/mockito/mockito/issues/3380">#3380</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3380">mockito/mockito#3380</a>)</li>
<li>Bump gradle/wrapper-validation-action from 3.4.1 to 3.4.2 [(<a href="https://redirect.github.com/mockito/mockito/issues/3376">#3376</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3376">mockito/mockito#3376</a>)</li>
<li>Bump gradle/wrapper-validation-action from 3.4.0 to 3.4.1 [(<a href="https://redirect.github.com/mockito/mockito/issues/3372">#3372</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3372">mockito/mockito#3372</a>)</li>
<li>Bump gradle/wrapper-validation-action from 3.3.2 to 3.4.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3365">#3365</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3365">mockito/mockito#3365</a>)</li>
<li>Bump org.shipkit:shipkit-auto-version from 2.0.7 to 2.0.9 [(<a href="https://redirect.github.com/mockito/mockito/issues/3364">#3364</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3364">mockito/mockito#3364</a>)</li>
<li>Bump com.gradle.enterprise from 3.17.4 to 3.17.5 [(<a href="https://redirect.github.com/mockito/mockito/issues/3363">#3363</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3363">mockito/mockito#3363</a>)</li>
<li>Bump org.eclipse.platform:org.eclipse.osgi from 3.19.0 to 3.20.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3362">#3362</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3362">mockito/mockito#3362</a>)</li>
<li>Bump net.ltgt.gradle:gradle-errorprone-plugin from 3.1.0 to 4.0.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3361">#3361</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3361">mockito/mockito#3361</a>)</li>
<li>Bump versions.bytebuddy from 1.14.16 to 1.14.17 [(<a href="https://redirect.github.com/mockito/mockito/issues/3357">#3357</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3357">mockito/mockito#3357</a>)</li>
<li>Bump org.assertj:assertj-core from 3.25.3 to 3.26.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3355">#3355</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3355">mockito/mockito#3355</a>)</li>
<li>EditorConfig enhancement [(<a href="https://redirect.github.com/mockito/mockito/issues/3353">#3353</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3353">mockito/mockito#3353</a>)</li>
<li>Bump versions.bytebuddy from 1.14.15 to 1.14.16 [(<a href="https://redirect.github.com/mockito/mockito/issues/3352">#3352</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3352">mockito/mockito#3352</a>)</li>
<li>Bump org.jetbrains.kotlin:kotlin-stdlib from 1.9.24 to 2.0.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/3351">#3351</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3351">mockito/mockito#3351</a>)</li>
<li>Fixes <a href="https://redirect.github.com/mockito/mockito/issues/3237">#3237</a>: Fix NullPointerException in Only.verify [(<a href="https://redirect.github.com/mockito/mockito/issues/3349">#3349</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3349">mockito/mockito#3349</a>)</li>
<li>Bump com.gradle.enterprise from 3.17.3 to 3.17.4 [(<a href="https://redirect.github.com/mockito/mockito/issues/3348">#3348</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/3348">mockito/mockito#3348</a>)</li>
<li>potential editorconfig enhancement [(<a href="https://redirect.github.com/mockito/mockito/issues/3347">#3347</a>)](<a href="https://redirect.github.com/mockito/mockito/issues/3347">mockito/mockito#3347</a>)</li>
<li>Method <code>Only.verify</code> throws <code>NullPointerException</code> [(<a href="https://redirect.github.com/mockito/mockito/issues/3237">#3237</a>)](<a href="https://redirect.github.com/mockito/mockito/issues/3237">mockito/mockito#3237</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/mockito/mockito/commit/9a7e7ea04294fd53e13936c18aca37640ca4dc5e"><code>9a7e7ea</code></a> Replace <code>dependencies.gradle</code> with <code>libs.versions.toml</code></li>
<li><a href="https://github.com/mockito/mockito/commit/1b7675c045b47637e300230624b29aad80bf64d4"><code>1b7675c</code></a> Allow links to JUnit Jupiter Javadoc</li>
<li><a href="https://github.com/mockito/mockito/commit/f6450a01f27d0e4e7e95a52025b000726558774d"><code>f6450a0</code></a> Bump versions.bytebuddy from 1.14.19 to 1.15.0 (<a href="https://redirect.github.com/mockito/mockito/issues/3429">#3429</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/77c31972b9bb8100765359081faed88aa35e6e08"><code>77c3197</code></a> Bump org.jetbrains.kotlin:kotlin-stdlib from 2.0.10 to 2.0.20 (<a href="https://redirect.github.com/mockito/mockito/issues/3427">#3427</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/0e5962428b64eadcd5ddcc89848fa6f1345454ec"><code>0e59624</code></a> Bump org.junit.platform:junit-platform-launcher from 1.10.3 to 1.11.0 (<a href="https://redirect.github.com/mockito/mockito/issues/3425">#3425</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/40925b6f93f81c598112636533fc469e85476edb"><code>40925b6</code></a> Bump com.gradle.enterprise from 3.17.6 to 3.18 (<a href="https://redirect.github.com/mockito/mockito/issues/3423">#3423</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/84f605d5d6da079dabd6ca7e29926c03f0dad45c"><code>84f605d</code></a> Fix a typo in InjectMocks (<a href="https://redirect.github.com/mockito/mockito/issues/3422">#3422</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/87e4a4fa85c84cbd09420c2c8e73bab3627708a7"><code>87e4a4f</code></a> Bump versions.bytebuddy from 1.14.18 to 1.14.19 (<a href="https://redirect.github.com/mockito/mockito/issues/3417">#3417</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/819cc6f6d867fe4aec06178e68b5faca16101e9c"><code>819cc6f</code></a> Bump androidx.test:runner from 1.6.1 to 1.6.2 (<a href="https://redirect.github.com/mockito/mockito/issues/3415">#3415</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/90df798c9623ef0c010c86319ddfeb5be64fe5f3"><code>90df798</code></a> Bump versions.junitJupiter from 5.10.3 to 5.11.0 (<a href="https://redirect.github.com/mockito/mockito/issues/3413">#3413</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/mockito/mockito/compare/v5.12.0...v5.13.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.mockito:mockito-junit-jupiter&package-manager=maven&previous-version=5.12.0&new-version=5.13.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 577f23e6a719c..49e5348ef5af5 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -279,7 +279,7 @@ under the License.
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-junit-jupiter</artifactId>
-      <version>5.12.0</version>
+      <version>5.13.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>

From 540b2ce393c24373fd35f649eecfbb4cd336e037 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 15:02:30 +0900
Subject: [PATCH 59/63] MINOR: [Java] Bump com.github.luben:zstd-jni from
 1.5.6-4 to 1.5.6-5 in /java (#43921)

Bumps [com.github.luben:zstd-jni](https://github.com/luben/zstd-jni) from 1.5.6-4 to 1.5.6-5.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/luben/zstd-jni/commit/ac14b057018be44a4b186d3682d4aae207928262"><code>ac14b05</code></a> Update the sbt syntax for the AIX target</li>
<li><a href="https://github.com/luben/zstd-jni/commit/1adcc4993395a519ed02cc87b7b255eceb63580c"><code>1adcc49</code></a> Try to bring back the AIX build</li>
<li><a href="https://github.com/luben/zstd-jni/commit/4e981883af0ac5c45bc6ca3b7479b1ac4c0a7715"><code>4e98188</code></a> v1.5.6-5</li>
<li><a href="https://github.com/luben/zstd-jni/commit/ee88b906af4f197609744bc5e98c7b35034f8bef"><code>ee88b90</code></a> Don't define <code>Automatic-Module-Name</code> in the Manifest</li>
<li>See full diff in <a href="https://github.com/luben/zstd-jni/compare/v1.5.6-4...v1.5.6-5">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.github.luben:zstd-jni&package-manager=maven&previous-version=1.5.6-4&new-version=1.5.6-5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/compression/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index 46ed8796423eb..f0d8e92c9a41d 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -55,7 +55,7 @@ under the License.
     <dependency>
       <groupId>com.github.luben</groupId>
       <artifactId>zstd-jni</artifactId>
-      <version>1.5.6-4</version>
+      <version>1.5.6-5</version>
     </dependency>
   </dependencies>
 </project>

From 41e1118f083f21ad2677c182ceb8629e861e8396 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 15:16:01 +0900
Subject: [PATCH 60/63] MINOR: [Java] Bump org.apache.orc:orc-core from 1.9.2
 to 1.9.4 in /java (#43918)

Bumps org.apache.orc:orc-core from 1.9.2 to 1.9.4.

<details>
<summary>Most Recent Ignore Conditions Applied to This Pull Request</summary>

| Dependency Name | Ignore Conditions |
| --- | --- |
| org.apache.orc:orc-core | [>= 2.a, < 3] |
</details>

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.orc:orc-core&package-manager=maven&previous-version=1.9.2&new-version=1.9.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/adapter/orc/pom.xml | 2 +-
 java/dataset/pom.xml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index ec8ddbbb780df..d9cd2bb21a526 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -61,7 +61,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index f3384fabbed6d..7e649e3824b93 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -130,7 +130,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>

From 99bc23d901f14a5a5146defc030db995b6d46d63 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 15:30:43 +0900
Subject: [PATCH 61/63] MINOR: [Java] Bump parquet.version from 1.14.1 to
 1.14.2 in /java (#43920)

Bumps `parquet.version` from 1.14.1 to 1.14.2.
Updates `org.apache.parquet:parquet-avro` from 1.14.1 to 1.14.2
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/parquet-mr/releases">org.apache.parquet:parquet-avro's releases</a>.</em></p>
<blockquote>
<h2>Apache Parquet Java 1.14.2</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2949">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2957">GH-2956</a>: Use avro SchemaBuilder API to convert record</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2951">GH-2935</a>: Avoid double close of ParquetFileWriter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/issues/2992">GH-2992</a>: Gate LocalTimestamp references in AvroSchemaConverter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Write unencrypted Parquet files without Hadoop</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1350">PARQUET-2472</a>: Close in finally block in <code>ParquetFileWriter#end</code></li>
</ul>
<h2>Apache Parquet Java 1.14.2 RC2</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2949">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2957">GH-2956</a>: Use avro SchemaBuilder API to convert record</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2951">GH-2935</a>: Avoid double close of ParquetFileWriter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/issues/2992">GH-2992</a>: Gate LocalTimestamp references in AvroSchemaConverter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Write unencrypted Parquet files without Hadoop</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1350">PARQUET-2472</a>: Close in finally block in <code>ParquetFileWriter#end</code></li>
</ul>
<h2>Apache Parquet Java 1.14.2 RC1</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2949">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2957">GH-2956</a>: Use avro SchemaBuilder API to convert record</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2951">GH-2935</a>: Avoid double close of ParquetFileWriter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Write unencrypted Parquet files without Hadoop</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1350">PARQUET-2472</a>: Close in finally block in <code>ParquetFileWriter#end</code></li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/apache/parquet-java/blob/master/CHANGES.md">org.apache.parquet:parquet-avro's changelog</a>.</em></p>
<blockquote>

<h1>Parquet</h1>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/parquet-java/commit/e7937382e7894f4780c90eb6f896c163cad4cd93"><code>e793738</code></a> [maven-release-plugin] prepare release apache-parquet-1.14.2rc2</li>
<li><a href="https://github.com/apache/parquet-java/commit/d04986ffbd2bc974d07c0db20afd6d2467235cbf"><code>d04986f</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2992">GH-2992</a>: Gate LocalTimestamp references in AvroSchemaConverter (<a href="https://redirect.github.com/apache/parquet-mr/issues/2993">#2993</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/7204a116bb4bc2fa5727e47253f75b67e600c7cb"><code>7204a11</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/parquet-java/commit/7a679f1fefb3c6a12602a33ba405264e4e4e3c40"><code>7a679f1</code></a> [maven-release-plugin] prepare release apache-parquet-1.14.2-rc1</li>
<li><a href="https://github.com/apache/parquet-java/commit/c88a3f8ab0dd2f4041b6249c807f43ed6e6d052a"><code>c88a3f8</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2948">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile...</li>
<li><a href="https://github.com/apache/parquet-java/commit/af4307b25349d78a4f401194f31786e1c4929b3f"><code>af4307b</code></a> PARQUET-1126: Write unencrypted Parquet files without Hadoop (<a href="https://redirect.github.com/apache/parquet-mr/issues/1376">#1376</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/0f3a615acb06dd8ab201f37e485aaac619e467ba"><code>0f3a615</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2956">GH-2956</a>: Use avro SchemaBuilder API to convert record (<a href="https://redirect.github.com/apache/parquet-mr/issues/2957">#2957</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/ca572cbad6ceb9fe303c057cae447c6fd9586f67"><code>ca572cb</code></a> Minor: <code>PARQUET-2472</code> is not on the branch (<a href="https://redirect.github.com/apache/parquet-mr/issues/2966">#2966</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/05f2e39cf3add09501c1534328db2452370a582c"><code>05f2e39</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2935">GH-2935</a>: Avoid double close of ParquetFileWriter (<a href="https://redirect.github.com/apache/parquet-mr/issues/2951">#2951</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/4241df31bfe4f4c90fd0c1907b96109ab16fa5e9"><code>4241df3</code></a> PARQUET-2472: Close in finally block in ParquetFileWriter#end (<a href="https://redirect.github.com/apache/parquet-mr/issues/1350">#1350</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/apache/parquet-mr/compare/apache-parquet-1.14.1...apache-parquet-1.14.2">compare view</a></li>
</ul>
</details>
<br />

Updates `org.apache.parquet:parquet-hadoop` from 1.14.1 to 1.14.2
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/parquet-mr/releases">org.apache.parquet:parquet-hadoop's releases</a>.</em></p>
<blockquote>
<h2>Apache Parquet Java 1.14.2</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2949">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2957">GH-2956</a>: Use avro SchemaBuilder API to convert record</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2951">GH-2935</a>: Avoid double close of ParquetFileWriter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/issues/2992">GH-2992</a>: Gate LocalTimestamp references in AvroSchemaConverter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Write unencrypted Parquet files without Hadoop</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1350">PARQUET-2472</a>: Close in finally block in <code>ParquetFileWriter#end</code></li>
</ul>
<h2>Apache Parquet Java 1.14.2 RC2</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2949">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2957">GH-2956</a>: Use avro SchemaBuilder API to convert record</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2951">GH-2935</a>: Avoid double close of ParquetFileWriter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/issues/2992">GH-2992</a>: Gate LocalTimestamp references in AvroSchemaConverter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Write unencrypted Parquet files without Hadoop</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1350">PARQUET-2472</a>: Close in finally block in <code>ParquetFileWriter#end</code></li>
</ul>
<h2>Apache Parquet Java 1.14.2 RC1</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2949">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2957">GH-2956</a>: Use avro SchemaBuilder API to convert record</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/2951">GH-2935</a>: Avoid double close of ParquetFileWriter</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1376">PARQUET-1126</a>: Write unencrypted Parquet files without Hadoop</li>
<li><a href="https://redirect.github.com/apache/parquet-java/pull/1350">PARQUET-2472</a>: Close in finally block in <code>ParquetFileWriter#end</code></li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/apache/parquet-java/blob/master/CHANGES.md">org.apache.parquet:parquet-hadoop's changelog</a>.</em></p>
<blockquote>

<h1>Parquet</h1>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/parquet-java/commit/e7937382e7894f4780c90eb6f896c163cad4cd93"><code>e793738</code></a> [maven-release-plugin] prepare release apache-parquet-1.14.2rc2</li>
<li><a href="https://github.com/apache/parquet-java/commit/d04986ffbd2bc974d07c0db20afd6d2467235cbf"><code>d04986f</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2992">GH-2992</a>: Gate LocalTimestamp references in AvroSchemaConverter (<a href="https://redirect.github.com/apache/parquet-mr/issues/2993">#2993</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/7204a116bb4bc2fa5727e47253f75b67e600c7cb"><code>7204a11</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/parquet-java/commit/7a679f1fefb3c6a12602a33ba405264e4e4e3c40"><code>7a679f1</code></a> [maven-release-plugin] prepare release apache-parquet-1.14.2-rc1</li>
<li><a href="https://github.com/apache/parquet-java/commit/c88a3f8ab0dd2f4041b6249c807f43ed6e6d052a"><code>c88a3f8</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2948">GH-2948</a>: Fix NPE when using the AvroParquetReader.Builder with LocalInputFile...</li>
<li><a href="https://github.com/apache/parquet-java/commit/af4307b25349d78a4f401194f31786e1c4929b3f"><code>af4307b</code></a> PARQUET-1126: Write unencrypted Parquet files without Hadoop (<a href="https://redirect.github.com/apache/parquet-mr/issues/1376">#1376</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/0f3a615acb06dd8ab201f37e485aaac619e467ba"><code>0f3a615</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2956">GH-2956</a>: Use avro SchemaBuilder API to convert record (<a href="https://redirect.github.com/apache/parquet-mr/issues/2957">#2957</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/ca572cbad6ceb9fe303c057cae447c6fd9586f67"><code>ca572cb</code></a> Minor: <code>PARQUET-2472</code> is not on the branch (<a href="https://redirect.github.com/apache/parquet-mr/issues/2966">#2966</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/05f2e39cf3add09501c1534328db2452370a582c"><code>05f2e39</code></a> <a href="https://redirect.github.com/apache/parquet-mr/issues/2935">GH-2935</a>: Avoid double close of ParquetFileWriter (<a href="https://redirect.github.com/apache/parquet-mr/issues/2951">#2951</a>)</li>
<li><a href="https://github.com/apache/parquet-java/commit/4241df31bfe4f4c90fd0c1907b96109ab16fa5e9"><code>4241df3</code></a> PARQUET-2472: Close in finally block in ParquetFileWriter#end (<a href="https://redirect.github.com/apache/parquet-mr/issues/1350">#1350</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/apache/parquet-mr/compare/apache-parquet-1.14.1...apache-parquet-1.14.2">compare view</a></li>
</ul>
</details>
<br />

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/dataset/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index 7e649e3824b93..a19e934f0de98 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -32,7 +32,7 @@ under the License.
 
   <properties>
     <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
-    <parquet.version>1.14.1</parquet.version>
+    <parquet.version>1.14.2</parquet.version>
     <avro.version>1.12.0</avro.version>
   </properties>
 

From db9435f324d816c7ed7e0a18c9806ef9f51873a3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 15:31:00 +0900
Subject: [PATCH 62/63] MINOR: [Java] Bump error_prone_core.version from 2.30.0
 to 2.31.0 in /java (#43923)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps `error_prone_core.version` from 2.30.0 to 2.31.0.
Updates `com.google.errorprone:error_prone_annotations` from 2.30.0 to 2.31.0
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/google/error-prone/releases">com.google.errorprone:error_prone_annotations's releases</a>.</em></p>
<blockquote>
<h2>Error Prone 2.31.0</h2>
<p>This is the last planned minor release of Error Prone that will support running on JDK 11, see <a href="https://redirect.github.com/google/error-prone/issues/3803">#3803</a>. Using Error Prone to compile code that is deployed to earlier versions will continue to be fully supported, but will require using JDK 17 or newer for compilation and setting <code>--release</code> or <code>-source</code>/<code>-target</code>/<code>-bootclasspath</code>.</p>
<p>Changes:</p>
<ul>
<li>Introduce <a href="https://github.com/google/error-prone/blob/2656f48902f6723f3147caa117372309dbc6c15f/type_annotations/src/main/java/com/google/errorprone/annotations/ThreadSafeTypeParameter.java"><code>@ ThreadSafeTypeParameter</code></a> with enforcement by <a href="https://errorprone.info/bugpattern/ThreadSafe">ThreadSafe</a></li>
<li>Improved support for latest JDK 24 EA builds</li>
<li>Error Prone is now distributed as a Multi-Release jar (<a href="https://redirect.github.com/google/error-prone/issues/3756">#3756</a>)</li>
</ul>
<p>New checks:</p>
<ul>
<li><a href="https://errorprone.info/bugpattern/AutoValueBoxedValues"><code>AutoValueBoxedValues</code></a>: AutoValue instances should not usually contain boxed types that are not Nullable. We recommend removing the unnecessary boxing.</li>
</ul>
<p>Full changelog: <a href="https://github.com/google/error-prone/compare/v2.30.0...v2.31.0">https://github.com/google/error-prone/compare/v2.30.0...v2.31.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/google/error-prone/commit/4294aac27cb0a5ec536fdfdbe0ec5227ac90c1a2"><code>4294aac</code></a> Release Error Prone 2.31.0</li>
<li><a href="https://github.com/google/error-prone/commit/5bf91fb051bce74517456a35e798c44c331d7da2"><code>5bf91fb</code></a> Replace <code>{@ link ThreadSafeTypeParameter}</code> with <code>{@ code ThreadSafeTypeParameter}</code></li>
<li><a href="https://github.com/google/error-prone/commit/a5a718974dd7d325025ea14c1492f113490d5cf8"><code>a5a7189</code></a> Replace <code>ComparisonChain</code> with a <code>Comparator</code> chain.</li>
<li><a href="https://github.com/google/error-prone/commit/7e9a10089b731fcff39d711aab25bc2b8b8d0c5a"><code>7e9a100</code></a> Make ThreadSafeTypeParameter useful in the open-source version of ErrorProne.</li>
<li><a href="https://github.com/google/error-prone/commit/b4cebef79651ae33277459240fc74d53e61ef3a9"><code>b4cebef</code></a> Fix typo noted by <a href="https://github.com/Stephan202"><code>@​Stephan202</code></a>.</li>
<li><a href="https://github.com/google/error-prone/commit/354104ec807269d79848d9d84b448f5e7e8e4315"><code>354104e</code></a> Remove <code>ThreadSafe.TypeParameter</code> now that it's been replaced by `ThreadSafeT...</li>
<li><a href="https://github.com/google/error-prone/commit/7542d36993acb6ac6c219c30e6bbac3ab8d0b793"><code>7542d36</code></a> Don't fire <code>CanIgnoreReturnValueSuggester</code> for simple <code>return param;</code> impleme...</li>
<li><a href="https://github.com/google/error-prone/commit/0a5a5b8bca44854904ac13b704f761a8c2a1277f"><code>0a5a5b8</code></a> Migrate <code>CollectionIncompatibleType</code> from the deprecated <code>withSignature</code> to `...</li>
<li><a href="https://github.com/google/error-prone/commit/78218f298883071c44f91fea30d8c2916f2da6df"><code>78218f2</code></a> Write more about <code>withSignature</code>.</li>
<li><a href="https://github.com/google/error-prone/commit/90d939069d5b59cc404da5ac48b25509b2ebef40"><code>90d9390</code></a> Mark some Kotlin ranges as Immutable.</li>
<li>Additional commits viewable in <a href="https://github.com/google/error-prone/compare/v2.30.0...v2.31.0">compare view</a></li>
</ul>
</details>
<br />

Updates `com.google.errorprone:error_prone_core` from 2.30.0 to 2.31.0
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/google/error-prone/releases">com.google.errorprone:error_prone_core's releases</a>.</em></p>
<blockquote>
<h2>Error Prone 2.31.0</h2>
<p>This is the last planned minor release of Error Prone that will support running on JDK 11, see <a href="https://redirect.github.com/google/error-prone/issues/3803">#3803</a>. Using Error Prone to compile code that is deployed to earlier versions will continue to be fully supported, but will require using JDK 17 or newer for compilation and setting <code>--release</code> or <code>-source</code>/<code>-target</code>/<code>-bootclasspath</code>.</p>
<p>Changes:</p>
<ul>
<li>Introduce <a href="https://github.com/google/error-prone/blob/2656f48902f6723f3147caa117372309dbc6c15f/type_annotations/src/main/java/com/google/errorprone/annotations/ThreadSafeTypeParameter.java"><code>@ ThreadSafeTypeParameter</code></a> with enforcement by <a href="https://errorprone.info/bugpattern/ThreadSafe">ThreadSafe</a></li>
<li>Improved support for latest JDK 24 EA builds</li>
<li>Error Prone is now distributed as a Multi-Release jar (<a href="https://redirect.github.com/google/error-prone/issues/3756">#3756</a>)</li>
</ul>
<p>New checks:</p>
<ul>
<li><a href="https://errorprone.info/bugpattern/AutoValueBoxedValues"><code>AutoValueBoxedValues</code></a>: AutoValue instances should not usually contain boxed types that are not Nullable. We recommend removing the unnecessary boxing.</li>
</ul>
<p>Full changelog: <a href="https://github.com/google/error-prone/compare/v2.30.0...v2.31.0">https://github.com/google/error-prone/compare/v2.30.0...v2.31.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/google/error-prone/commit/4294aac27cb0a5ec536fdfdbe0ec5227ac90c1a2"><code>4294aac</code></a> Release Error Prone 2.31.0</li>
<li><a href="https://github.com/google/error-prone/commit/5bf91fb051bce74517456a35e798c44c331d7da2"><code>5bf91fb</code></a> Replace <code>{@ link ThreadSafeTypeParameter}</code> with <code>{@ code ThreadSafeTypeParameter}</code></li>
<li><a href="https://github.com/google/error-prone/commit/a5a718974dd7d325025ea14c1492f113490d5cf8"><code>a5a7189</code></a> Replace <code>ComparisonChain</code> with a <code>Comparator</code> chain.</li>
<li><a href="https://github.com/google/error-prone/commit/7e9a10089b731fcff39d711aab25bc2b8b8d0c5a"><code>7e9a100</code></a> Make ThreadSafeTypeParameter useful in the open-source version of ErrorProne.</li>
<li><a href="https://github.com/google/error-prone/commit/b4cebef79651ae33277459240fc74d53e61ef3a9"><code>b4cebef</code></a> Fix typo noted by <a href="https://github.com/Stephan202"><code>@​Stephan202</code></a>.</li>
<li><a href="https://github.com/google/error-prone/commit/354104ec807269d79848d9d84b448f5e7e8e4315"><code>354104e</code></a> Remove <code>ThreadSafe.TypeParameter</code> now that it's been replaced by `ThreadSafeT...</li>
<li><a href="https://github.com/google/error-prone/commit/7542d36993acb6ac6c219c30e6bbac3ab8d0b793"><code>7542d36</code></a> Don't fire <code>CanIgnoreReturnValueSuggester</code> for simple <code>return param;</code> impleme...</li>
<li><a href="https://github.com/google/error-prone/commit/0a5a5b8bca44854904ac13b704f761a8c2a1277f"><code>0a5a5b8</code></a> Migrate <code>CollectionIncompatibleType</code> from the deprecated <code>withSignature</code> to `...</li>
<li><a href="https://github.com/google/error-prone/commit/78218f298883071c44f91fea30d8c2916f2da6df"><code>78218f2</code></a> Write more about <code>withSignature</code>.</li>
<li><a href="https://github.com/google/error-prone/commit/90d939069d5b59cc404da5ac48b25509b2ebef40"><code>90d9390</code></a> Mark some Kotlin ranges as Immutable.</li>
<li>Additional commits viewable in <a href="https://github.com/google/error-prone/compare/v2.30.0...v2.31.0">compare view</a></li>
</ul>
</details>
<br />

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 49e5348ef5af5..81e652f462e02 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -107,7 +107,7 @@ under the License.
     <forkCount>2</forkCount>
     <checkstyle.version>10.17.0</checkstyle.version>
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
-    <error_prone_core.version>2.30.0</error_prone_core.version>
+    <error_prone_core.version>2.31.0</error_prone_core.version>
     <mockito.core.version>5.11.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
     <checker.framework.version>3.46.0</checker.framework.version>

From 170c599cca72971c5db07305a73fd5d4885c1e61 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 3 Sep 2024 14:36:56 +0200
Subject: [PATCH 63/63] GH-40216: [Python][CI][Packaging] Upload nightly wheels
 to main label of scientific-python-nightly-wheels channel (#43932)

### Rationale for this change

Small follow-up on https://github.com/apache/arrow/pull/43862, correcting the `label` being used to upload the wheels. See https://github.com/apache/arrow/issues/40216#issuecomment-2325937999 for context.

* GitHub Issue: #40216

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 dev/tasks/macros.jinja | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 63cb2fc6dd101..082d33b124f9f 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -189,7 +189,7 @@ env:
     shell: bash
     run: |
       python3 -m pip install git+https://github.com/Anaconda-Platform/anaconda-client.git@1.12.3
-      anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label dev {{ pattern }}
+      anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label main {{ pattern }}
     env:
       CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN: {{ '${{ secrets.CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN }}' }}
   {% endif %}