diff --git a/.env b/.env
index 1358aafe824a6..c8c236d5ac44b 100644
--- a/.env
+++ b/.env
@@ -58,8 +58,8 @@ CUDA=11.2.2
 DASK=latest
 DOTNET=8.0
 GCC_VERSION=""
-GO=1.21.8
-STATICCHECK=v0.4.7
+GO=1.22.6
+STATICCHECK=v0.5.1
 HDFS=3.2.1
 JDK=11
 KARTOTHEK=latest
@@ -71,6 +71,7 @@ NUMBA=latest
 NUMPY=latest
 PANDAS=latest
 PYTHON=3.8
+PYTHON_IMAGE_TAG=3.8
 R=4.4
 SPARK=master
 TURBODBC=latest
@@ -95,7 +96,7 @@ VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01"    # 2024.04.26 Release
 # ci/docker/python-wheel-windows-vs2019.dockerfile.
 # This is a workaround for our CI problem that "archery docker build" doesn't
 # use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-06-18
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-08-06
 
 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker-compose run --rm conan".
 # See https://github.com/conan-io/conan-docker-tools#readme and
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index b016f7d11b9fa..2c46071010962 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -58,7 +58,7 @@ jobs:
         shell: bash
         run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.9'
       - name: Install pygit2 binary wheel
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 1138c0a02f812..b7af4c5800835 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -41,7 +41,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index a82e1eb76660b..20bcfcb38da69 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -99,7 +99,6 @@ jobs:
             cat <<JSON >> "$GITHUB_OUTPUT"
           {
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
             "clang-tools": "10",
             "image": "ubuntu-cpp",
             "llvm": "10",
@@ -124,9 +123,6 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
-      # By default, use `docker compose` because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: ${{ matrix.archery-use-legacy-docker-compose || '0' }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
       CLANG_TOOLS: ${{ matrix.clang-tools }}
       LLVM: ${{ matrix.llvm }}
@@ -147,6 +143,7 @@ jobs:
         run: |
           sudo apt update
           sudo apt install -y --no-install-recommends python3 python3-dev python3-pip
+          python3 -m pip install -U pip
       - name: Setup Archery
         run: python3 -m pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -246,7 +243,7 @@ jobs:
           $(brew --prefix bash)/bin/bash \
             ci/scripts/install_minio.sh latest ${ARROW_HOME}
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: 3.12
       - name: Install Google Cloud Storage Testbench
@@ -412,12 +409,10 @@ jobs:
       ARROW_WITH_SNAPPY: ON
       ARROW_WITH_ZLIB: ON
       ARROW_WITH_ZSTD: ON
-      # Don't use preinstalled Boost by empty BOOST_ROOT and
-      # -DBoost_NO_BOOST_CMAKE=ON
+      # Don't use preinstalled Boost by empty BOOST_ROOT
       BOOST_ROOT: ""
       ARROW_CMAKE_ARGS: >-
         -DARROW_PACKAGE_PREFIX=/${{ matrix.msystem_lower}}
-        -DBoost_NO_BOOST_CMAKE=ON
         -DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
       # We can't use unity build because we don't have enough memory on
       # GitHub Actions.
@@ -467,16 +462,18 @@ jobs:
             https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
           chmod +x /usr/local/bin/minio.exe
       - name: Set up Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
+        id: python-install
         with:
           python-version: 3.9
       - name: Install Google Cloud Storage Testbench
-        shell: bash
+        shell: msys2 {0}
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: ${{ steps.python-install.outputs.python-path }}
         run: |
           ci/scripts/install_gcs_testbench.sh default
-          echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV
       - name: Test
         shell: msys2 {0}
         run: |
-          PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}"
           ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build"
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 6e8548dc960f4..c618350affbeb 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -108,7 +108,7 @@ jobs:
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Checkout Arrow
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index cc3ff6330746d..1cc8d993498b6 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -45,7 +45,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install pre-commit
@@ -104,7 +104,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.12'
       - name: Install Ruby
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 25db1c39ad89e..1219f7526f9f2 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -52,7 +52,7 @@ jobs:
           key: debian-docs-${{ hashFiles('cpp/**') }}
           restore-keys: debian-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index ea7fe5d02d7b8..454affd7fa7f9 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -58,7 +58,7 @@ jobs:
           key: conda-docs-${{ hashFiles('cpp/**') }}
           restore-keys: conda-docs-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 20c78d86cb2a3..9b18b010a0cb9 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -62,13 +62,13 @@ jobs:
           {
             "arch-label": "AMD64",
             "arch": "amd64",
-            "go": "1.21",
+            "go": "1.22",
             "runs-on": "ubuntu-latest"
           },
           {
             "arch-label": "AMD64",
             "arch": "amd64",
-            "go": "1.22",
+            "go": "1.23",
             "runs-on": "ubuntu-latest"
           }
           JSON
@@ -78,15 +78,13 @@ jobs:
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
-            "go": "1.21",
+            "go": "1.22",
             "runs-on": ["self-hosted", "arm", "linux"]
           },
           {
             "arch-label": "ARM64",
             "arch": "arm64v8",
-            "archery-use-legacy-docker-compose": "1",
-            "go": "1.22",
+            "go": "1.23",
             "runs-on": ["self-hosted", "arm", "linux"]
           }
           JSON
@@ -106,9 +104,6 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
-      # By default, use Docker CLI because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: ${{ matrix.archery-use-legacy-docker-compose || '0' }}
       GO: ${{ matrix.go }}
     steps:
       - name: Checkout Arrow
@@ -202,7 +197,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       GO: ${{ matrix.go }}
     steps:
@@ -212,7 +207,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -243,7 +238,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       GO: ${{ matrix.go }}
     steps:
@@ -252,7 +247,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -282,7 +277,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
@@ -315,7 +310,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
@@ -328,7 +323,7 @@ jobs:
           go-version: ${{ matrix.go }}
           cache: true
           cache-dependency-path: go/go.sum
-      - name: Install staticcheck      
+      - name: Install staticcheck
         run: |
           . .env
           go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
@@ -344,7 +339,7 @@ jobs:
           github.event_name == 'push' &&
           github.repository == 'apache/arrow' &&
           github.ref_name == 'main'
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.10'
       - name: Run Benchmarks
@@ -373,7 +368,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.21', '1.22']
+        go: ['1.22', '1.23']
     env:
       ARROW_GO_TESTCGO: "1"
     steps:
@@ -444,7 +439,7 @@ jobs:
           ci/scripts/msys2_setup.sh cgo
       - name: Get required Go version
         run: |
-          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV  
+          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV
       - name: Update CGO Env vars
         shell: msys2 {0}
         run: |
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 43f8af0a600d8..3a6b568c5207f 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -89,7 +89,7 @@ jobs:
           key: conda-${{ hashFiles('cpp/**') }}
           restore-keys: conda-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 0317879b580ba..8560f0dd1cbe9 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -76,7 +76,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index c2bc679e681a2..f204d6459ae01 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -70,7 +70,7 @@ jobs:
           key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
           restore-keys: java-jni-manylinux-2014-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -110,7 +110,7 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml
index 72afb6dbf1c1d..0bf0c27288faf 100644
--- a/.github/workflows/java_nightly.yml
+++ b/.github/workflows/java_nightly.yml
@@ -58,7 +58,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 630bef61105f6..4ab9831924fb1 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -54,7 +54,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml
index 7dd06b6aeec09..bbb1a2d7228d0 100644
--- a/.github/workflows/pr_bot.yml
+++ b/.github/workflows/pr_bot.yml
@@ -82,7 +82,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 916db2580e371..b88ea7ce4f1ee 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -59,6 +59,7 @@ jobs:
           - conda-python-3.9-nopandas
           - conda-python-3.8-pandas-1.0
           - conda-python-3.10-pandas-latest
+          - conda-python-3.10-no-numpy
         include:
           - name: conda-python-docs
             cache: conda-python-3.9
@@ -83,6 +84,11 @@ jobs:
             title: AMD64 Conda Python 3.10 Pandas latest
             python: "3.10"
             pandas: latest
+          - name: conda-python-3.10-no-numpy
+            cache: conda-python-3.10
+            image: conda-python-no-numpy
+            title: AMD64 Conda Python 3.10 without NumPy
+            python: "3.10"
     env:
       PYTHON: ${{ matrix.python || 3.8 }}
       UBUNTU: ${{ matrix.ubuntu || 20.04 }}
@@ -101,7 +107,7 @@ jobs:
           key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
           restore-keys: ${{ matrix.cache }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -163,7 +169,7 @@ jobs:
       ARROW_BUILD_TESTS: OFF
       PYARROW_TEST_LARGE_MEMORY: ON
       # Current oldest supported version according to https://endoflife.date/macos
-      MACOSX_DEPLOYMENT_TARGET: 10.15
+      MACOSX_DEPLOYMENT_TARGET: 12.0
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v4
@@ -171,7 +177,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@v5.1.1
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.11'
       - name: Install Dependencies
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index bf7eb99e7e990..21afa4586b5a4 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -86,19 +86,18 @@ jobs:
       run: |
         sudo apt-get install devscripts
 
-    # replace the SHA with v2 once INFRA-26031 is resolved
-    - uses: r-lib/actions/setup-r@732fb28088814627972f1ccbacc02561178cf391
+    - uses: r-lib/actions/setup-r@v2
       with:
         use-public-rspm: true
         install-r: false
 
-    - uses: r-lib/actions/setup-r-dependencies@732fb28088814627972f1ccbacc02561178cf391
+    - uses: r-lib/actions/setup-r-dependencies@v2
       with:
         extra-packages: any::rcmdcheck
         needs: check
         working-directory: src/r
 
-    - uses: r-lib/actions/check-r-package@732fb28088814627972f1ccbacc02561178cf391
+    - uses: r-lib/actions/check-r-package@v2
       with:
         working-directory: src/r
       env:
@@ -147,7 +146,7 @@ jobs:
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -207,7 +206,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -341,11 +340,11 @@ jobs:
           cd r/windows
           ls *.zip | xargs -n 1 unzip -uo
           rm -rf *.zip
-      - uses: r-lib/actions/setup-r@732fb28088814627972f1ccbacc02561178cf391
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.rversion }}
           Ncpus: 2
-      - uses: r-lib/actions/setup-r-dependencies@732fb28088814627972f1ccbacc02561178cf391
+      - uses: r-lib/actions/setup-r-dependencies@v2
         env:
           GITHUB_PAT: "${{ github.token }}"
         with:
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index 1ec071b6bbb5e..9817e41d3b61d 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -60,7 +60,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index e4d650e74a8ad..228bacb77e58a 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -83,7 +83,7 @@ jobs:
           key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
           restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby-
       - name: Setup Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -406,7 +406,10 @@ jobs:
             -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
       - name: Build C++ vcpkg dependencies
         run: |
-          vcpkg\vcpkg.exe install --triplet $env:VCPKG_TRIPLET --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed
+          vcpkg\vcpkg.exe install `
+            --triplet $env:VCPKG_TRIPLET `
+            --x-manifest-root cpp `
+            --x-install-root build\cpp\vcpkg_installed
       - name: Build C++
         shell: cmd
         run: |
diff --git a/appveyor.yml b/appveyor.yml
index 5954251d34733..9e4582f1d8d7f 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -24,6 +24,7 @@ only_commits:
     - appveyor.yml
     - ci/appveyor*
     - ci/conda*
+    - ci/scripts/*.bat
     - cpp/
     - format/
     - python/
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
index 80c47e336f872..23f59c9da69ad 100644
--- a/c_glib/arrow-flight-glib/client.cpp
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -33,10 +33,19 @@ G_BEGIN_DECLS
  * #GAFlightStreamReader is a class for reading record batches from a
  * server.
  *
+ * #GAFlightStreamWriter is a class for writing record batches to a
+ * server.
+ *
+ * #GAFlightMetadataReader is a class for reading metadata from a
+ * server.
+ *
  * #GAFlightCallOptions is a class for options of each call.
  *
  * #GAFlightClientOptions is a class for options of each client.
  *
+ * #GAFlightDoPutResult is a class that has gaflight_client_do_put()
+ * result.
+ *
  * #GAFlightClient is a class for Apache Arrow Flight client.
  *
  * Since: 5.0.0
@@ -56,6 +65,128 @@ gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass)
 {
 }
 
+G_DEFINE_TYPE(GAFlightStreamWriter,
+              gaflight_stream_writer,
+              GAFLIGHT_TYPE_RECORD_BATCH_WRITER)
+
+static void
+gaflight_stream_writer_init(GAFlightStreamWriter *object)
+{
+}
+
+static void
+gaflight_stream_writer_class_init(GAFlightStreamWriterClass *klass)
+{
+}
+
+/**
+ * gaflight_stream_writer_done_writing:
+ * @writer: A #GAFlightStreamWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::FlightStreamWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  return garrow::check(error,
+                       flight_writer->DoneWriting(),
+                       "[flight-stream-writer][done-writing]");
+}
+
+struct GAFlightMetadataReaderPrivate
+{
+  arrow::flight::FlightMetadataReader *reader;
+};
+
+enum {
+  PROP_METADATA_READER_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightMetadataReader,
+                           gaflight_metadata_reader,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_METADATA_READER_GET_PRIVATE(object)                                     \
+  static_cast<GAFlightMetadataReaderPrivate *>(                                          \
+    gaflight_metadata_reader_get_instance_private(GAFLIGHT_METADATA_READER(object)))
+
+static void
+gaflight_metadata_reader_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+  delete priv->reader;
+  G_OBJECT_CLASS(gaflight_metadata_reader_parent_class)->finalize(object);
+}
+
+static void
+gaflight_metadata_reader_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_METADATA_READER_READER:
+    priv->reader =
+      static_cast<arrow::flight::FlightMetadataReader *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_metadata_reader_init(GAFlightMetadataReader *object)
+{
+}
+
+static void
+gaflight_metadata_reader_class_init(GAFlightMetadataReaderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_metadata_reader_finalize;
+  gobject_class->set_property = gaflight_metadata_reader_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "reader",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_METADATA_READER_READER, spec);
+}
+
+/**
+ * gaflight_metadata_reader_read:
+ * @reader: A #GAFlightMetadataReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The metadata on success, %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error)
+{
+  auto flight_reader = gaflight_metadata_reader_get_raw(reader);
+  std::shared_ptr<arrow::Buffer> metadata;
+  if (garrow::check(error,
+                    flight_reader->ReadMetadata(&metadata),
+                    "[flight-metadata-reader][read]")) {
+    return garrow_buffer_new_raw(&metadata);
+  } else {
+    return nullptr;
+  }
+}
+
 typedef struct GAFlightCallOptionsPrivate_
 {
   arrow::flight::FlightCallOptions options;
@@ -385,6 +516,137 @@ gaflight_client_options_new(void)
     g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL));
 }
 
+struct GAFlightDoPutResultPrivate
+{
+  GAFlightStreamWriter *writer;
+  GAFlightMetadataReader *reader;
+};
+
+enum {
+  PROP_DO_PUT_RESULT_RESULT = 1,
+  PROP_DO_PUT_RESULT_WRITER,
+  PROP_DO_PUT_RESULT_READER,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDoPutResult, gaflight_do_put_result, G_TYPE_OBJECT)
+
+#define GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object)                                       \
+  static_cast<GAFlightDoPutResultPrivate *>(                                             \
+    gaflight_do_put_result_get_instance_private(GAFLIGHT_DO_PUT_RESULT(object)))
+
+static void
+gaflight_do_put_result_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  if (priv->writer) {
+    g_object_unref(priv->writer);
+    priv->writer = nullptr;
+  }
+
+  if (priv->reader) {
+    g_object_unref(priv->reader);
+    priv->reader = nullptr;
+  }
+
+  G_OBJECT_CLASS(gaflight_do_put_result_parent_class)->dispose(object);
+}
+
+static void
+gaflight_do_put_result_init(GAFlightDoPutResult *object)
+{
+}
+
+static void
+gaflight_do_put_result_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_RESULT:
+    {
+      auto result = static_cast<arrow::flight::FlightClient::DoPutResult *>(
+        g_value_get_pointer(value));
+      priv->writer = gaflight_stream_writer_new_raw(result->writer.release());
+      priv->reader = gaflight_metadata_reader_new_raw(result->reader.release());
+      break;
+    }
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_WRITER:
+    g_value_set_object(value, priv->writer);
+    break;
+  case PROP_DO_PUT_RESULT_READER:
+    g_value_set_object(value, priv->reader);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_class_init(GAFlightDoPutResultClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_do_put_result_dispose;
+  gobject_class->set_property = gaflight_do_put_result_set_property;
+  gobject_class->get_property = gaflight_do_put_result_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "result",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_RESULT, spec);
+
+  /**
+   * GAFlightDoPutResult:writer:
+   *
+   * A writer to write record batches to.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("writer",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_STREAM_WRITER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_WRITER, spec);
+
+  /**
+   * GAFlightDoPutResult:reader:
+   *
+   * A reader for application metadata from the server.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("reader",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_METADATA_READER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_READER, spec);
+}
+
 struct GAFlightClientPrivate
 {
   std::shared_ptr<arrow::flight::FlightClient> client;
@@ -661,6 +923,51 @@ gaflight_client_do_get(GAFlightClient *client,
   return gaflight_stream_reader_new_raw(flight_reader.release(), TRUE);
 }
 
+/**
+ * gaflight_client_do_put:
+ * @client: A #GAFlightClient.
+ * @descriptor: A #GAFlightDescriptor.
+ * @schema: A #GArrowSchema.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Upload data to a Flight described by the given descriptor. The
+ * caller must call garrow_record_batch_writer_close() on the
+ * returned stream once they are done writing.
+ *
+ * The reader and writer are linked; closing the writer will also
+ * close the reader. Use garrow_flight_stream_writer_done_writing() to
+ * only close the write side of the channel.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The #GAFlighDoPutResult holding a reader and a writer on success,
+ *   %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  auto result = flight_client->DoPut(*flight_options, *flight_descriptor, arrow_schema);
+  if (!garrow::check(error, result, "[flight-client][do-put]")) {
+    return nullptr;
+  }
+  auto flight_result = std::move(*result);
+  return gaflight_do_put_result_new_raw(&flight_result);
+}
+
 G_END_DECLS
 
 GAFlightStreamReader *
@@ -672,7 +979,28 @@ gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                              flight_reader,
                                              "is-owner",
                                              is_owner,
-                                             NULL));
+                                             nullptr));
+}
+
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(arrow::flight::FlightStreamWriter *flight_writer)
+{
+  return GAFLIGHT_STREAM_WRITER(
+    g_object_new(GAFLIGHT_TYPE_STREAM_WRITER, "writer", flight_writer, nullptr));
+}
+
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader)
+{
+  return GAFLIGHT_METADATA_READER(
+    g_object_new(GAFLIGHT_TYPE_METADATA_READER, "reader", flight_reader, nullptr));
+}
+
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(reader);
+  return priv->reader;
 }
 
 arrow::flight::FlightCallOptions *
@@ -689,6 +1017,13 @@ gaflight_client_options_get_raw(GAFlightClientOptions *options)
   return &(priv->options);
 }
 
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result)
+{
+  return GAFLIGHT_DO_PUT_RESULT(
+    g_object_new(GAFLIGHT_TYPE_DO_PUT_RESULT, "result", flight_result, nullptr));
+}
+
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client)
 {
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
index a91bbe55e3c04..12c5a06b810e1 100644
--- a/c_glib/arrow-flight-glib/client.h
+++ b/c_glib/arrow-flight-glib/client.h
@@ -35,6 +35,35 @@ struct _GAFlightStreamReaderClass
   GAFlightRecordBatchReaderClass parent_class;
 };
 
+#define GAFLIGHT_TYPE_STREAM_WRITER (gaflight_stream_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamWriter,
+                         gaflight_stream_writer,
+                         GAFLIGHT,
+                         STREAM_WRITER,
+                         GAFlightRecordBatchWriter)
+struct _GAFlightStreamWriterClass
+{
+  GAFlightRecordBatchWriterClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error);
+
+#define GAFLIGHT_TYPE_METADATA_READER (gaflight_metadata_reader_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightMetadataReader, gaflight_metadata_reader, GAFLIGHT, METADATA_READER, GObject)
+struct _GAFlightMetadataReaderClass
+{
+  GObjectClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error);
+
 #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(
@@ -75,6 +104,15 @@ GAFLIGHT_AVAILABLE_IN_5_0
 GAFlightClientOptions *
 gaflight_client_options_new(void);
 
+#define GAFLIGHT_TYPE_DO_PUT_RESULT (gaflight_do_put_result_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightDoPutResult, gaflight_do_put_result, GAFLIGHT, DO_PUT_RESULT, GObject)
+struct _GAFlightDoPutResultClass
+{
+  GObjectClass parent_class;
+};
+
 #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject)
@@ -124,4 +162,12 @@ gaflight_client_do_get(GAFlightClient *client,
                        GAFlightCallOptions *options,
                        GError **error);
 
+GAFLIGHT_AVAILABLE_IN_18_0
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
index 185a28e6dc4bd..888f87ecb5732 100644
--- a/c_glib/arrow-flight-glib/client.hpp
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -28,6 +28,18 @@ GAFlightStreamReader *
 gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                gboolean is_owner);
 
+GAFLIGHT_EXTERN
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(arrow::flight::FlightStreamWriter *flight_writer);
+
+GAFLIGHT_EXTERN
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader);
+
+GAFLIGHT_EXTERN
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader);
+
 GAFLIGHT_EXTERN
 arrow::flight::FlightCallOptions *
 gaflight_call_options_get_raw(GAFlightCallOptions *options);
@@ -36,6 +48,10 @@ GAFLIGHT_EXTERN
 arrow::flight::FlightClientOptions *
 gaflight_client_options_get_raw(GAFlightClientOptions *options);
 
+GAFLIGHT_EXTERN
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result);
+
 GAFLIGHT_EXTERN
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client);
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index efc544f10cf66..3deaf67cc14e8 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -48,7 +48,11 @@ G_BEGIN_DECLS
  *
  * #GAFlightStreamChunk is a class for a chunk in stream.
  *
- * #GAFlightRecordBatchReader is a class for reading record batches.
+ * #GAFlightRecordBatchReader is an abstract class for reading record
+ * batches with metadata.
+ *
+ * #GAFlightRecordBatchWeriter is an abstract class for
+ * writing record batches with metadata.
  *
  * Since: 5.0.0
  */
@@ -1172,13 +1176,13 @@ typedef struct GAFlightRecordBatchReaderPrivate_
 } GAFlightRecordBatchReaderPrivate;
 
 enum {
-  PROP_READER = 1,
-  PROP_IS_OWNER,
+  PROP_RECORD_BATCH_READER_READER = 1,
+  PROP_RECORD_BATCH_READER_IS_OWNER,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
-                           gaflight_record_batch_reader,
-                           G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
+                                    gaflight_record_batch_reader,
+                                    G_TYPE_OBJECT)
 
 #define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj)                                    \
   static_cast<GAFlightRecordBatchReaderPrivate *>(                                       \
@@ -1192,7 +1196,7 @@ gaflight_record_batch_reader_finalize(GObject *object)
   if (priv->is_owner) {
     delete priv->reader;
   }
-  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gaflight_record_batch_reader_parent_class)->finalize(object);
 }
 
 static void
@@ -1204,11 +1208,11 @@ gaflight_record_batch_reader_set_property(GObject *object,
   auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_READER:
+  case PROP_RECORD_BATCH_READER_READER:
     priv->reader =
       static_cast<arrow::flight::MetadataRecordBatchReader *>(g_value_get_pointer(value));
     break;
-  case PROP_IS_OWNER:
+  case PROP_RECORD_BATCH_READER_IS_OWNER:
     priv->is_owner = g_value_get_boolean(value);
     break;
   default:
@@ -1236,7 +1240,7 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
     nullptr,
     nullptr,
     static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_READER, spec);
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER_READER, spec);
 
   spec = g_param_spec_boolean(
     "is-owner",
@@ -1244,7 +1248,7 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
     nullptr,
     TRUE,
     static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_IS_OWNER, spec);
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER_IS_OWNER, spec);
 }
 
 /**
@@ -1296,6 +1300,108 @@ gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError
   }
 }
 
+G_DEFINE_ABSTRACT_TYPE(GAFlightRecordBatchWriter,
+                       gaflight_record_batch_writer,
+                       GARROW_TYPE_RECORD_BATCH_WRITER)
+
+static void
+gaflight_record_batch_writer_init(GAFlightRecordBatchWriter *object)
+{
+}
+
+static void
+gaflight_record_batch_writer_class_init(GAFlightRecordBatchWriterClass *klass)
+{
+}
+
+/**
+ * gaflight_record_batch_writer_begin:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @schema: A #GArrowSchema.
+ * @options: (nullable): A #GArrowWriteOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Begins writing data with the given schema. Only used with
+ * `DoExchange`.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
+                                   GArrowSchema *schema,
+                                   GArrowWriteOptions *options,
+                                   GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::ipc::IpcWriteOptions arrow_write_options;
+  if (options) {
+    arrow_write_options = *garrow_write_options_get_raw(options);
+  } else {
+    arrow_write_options = arrow::ipc::IpcWriteOptions::Defaults();
+  }
+  return garrow::check(error,
+                       flight_writer->Begin(arrow_schema, arrow_write_options),
+                       "[flight-record-batch-writer][begin]");
+}
+
+/**
+ * gaflight_record_batch_writer_write_metadata:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @metadata: A #GArrowBuffer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Write metadata.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
+                                            GArrowBuffer *metadata,
+                                            GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  auto arrow_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(error,
+                       flight_writer->WriteMetadata(arrow_metadata),
+                       "[flight-record-batch-writer][write-metadata]");
+}
+
+/**
+ * gaflight_record_batch_writer_write_record_batch:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @record_batch: A #GArrowRecordBatch.
+ * @metadata: (nullable): A #GArrowBuffer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Write a record batch with metadata.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  auto arrow_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(
+    error,
+    flight_writer->WriteWithMetadata(*arrow_record_batch, arrow_metadata),
+    "[flight-record-batch-writer][write]");
+}
+
 G_END_DECLS
 
 GAFlightCriteria *
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index b1d89f79c357e..726132fe4921b 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -232,4 +232,36 @@ GAFLIGHT_AVAILABLE_IN_6_0
 GArrowTable *
 gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error);
 
+#define GAFLIGHT_TYPE_RECORD_BATCH_WRITER (gaflight_record_batch_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchWriter,
+                         gaflight_record_batch_writer,
+                         GAFLIGHT,
+                         RECORD_BATCH_WRITER,
+                         GArrowRecordBatchWriter)
+struct _GAFlightRecordBatchWriterClass
+{
+  GArrowRecordBatchWriterClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
+                                   GArrowSchema *schema,
+                                   GArrowWriteOptions *options,
+                                   GError **error);
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
+                                            GArrowBuffer *metadata,
+                                            GError **error);
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
index db56fff579baf..ae5a7703397dd 100644
--- a/c_glib/arrow-flight-glib/common.hpp
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -79,3 +79,7 @@ gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk);
 GAFLIGHT_EXTERN
 arrow::flight::MetadataRecordBatchReader *
 gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader);
+
+GAFLIGHT_EXTERN
+arrow::flight::MetadataRecordBatchWriter *
+gaflight_record_batch_writer_get_raw(GAFlightRecordBatchWriter *writer);
diff --git a/c_glib/arrow-glib/writer.hpp b/c_glib/arrow-glib/writer.hpp
index aa87ffe77d79b..1d85ac52f88d1 100644
--- a/c_glib/arrow-glib/writer.hpp
+++ b/c_glib/arrow-glib/writer.hpp
@@ -25,16 +25,20 @@
 
 #include <arrow-glib/writer.h>
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchWriter *
 garrow_record_batch_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+GARROW_AVAILABLE_IN_ALL
 std::shared_ptr<arrow::ipc::RecordBatchWriter>
 garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchStreamWriter *
 garrow_record_batch_stream_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index f688fbb63a9ad..08a052e82f24d 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -46,7 +46,9 @@ set ARROW_CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON
 set ARROW_CXXFLAGS=/WX /MP
 
 @rem Install GCS testbench
+set PIPX_BIN_DIR=C:\Windows\
 call %CD%\ci\scripts\install_gcs_testbench.bat
+storage-testbench -h || exit /B
 
 @rem
 @rem Build and test Arrow C++ libraries (including Parquet)
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index dff1f2224809a..f0084894e19dc 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -42,17 +42,19 @@ RUN mamba install -q -y \
         valgrind && \
     mamba clean --all
 
+# We want to install the GCS testbench using the Conda base environment's Python,
+# because the test environment's Python may later change.
+ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
+
 # Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to 
-# be on the path for the tests to run.  
+# be on the path for the tests to run.
 ENV PATH=/opt/conda/envs/arrow/bin:$PATH
 
 COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_azurite.sh
 
-# We want to install the GCS testbench using the same Python binary that the Conda code will use.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile
index c602490d6b729..7ad2e5c0e8008 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -24,7 +24,7 @@ ARG maven=3.8.7
 ARG node=16
 ARG yarn=1.22
 ARG jdk=11
-ARG go=1.21.8
+ARG go=1.22.6
 
 # Install Archery and integration dependencies
 COPY ci/conda_env_archery.txt /arrow/ci/
diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index 027fd589cecca..7e8dbe76f6248 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -32,11 +32,6 @@ RUN mamba install -q -y \
         nomkl && \
     mamba clean --all
 
-# XXX The GCS testbench was already installed in conda-cpp.dockerfile,
-# but we changed the installed Python version above, so we need to reinstall it.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 ENV ARROW_ACERO=ON \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
diff --git a/ci/docker/debian-12-go.dockerfile b/ci/docker/debian-12-go.dockerfile
index c958e6bdee211..4bc683c109eb8 100644
--- a/ci/docker/debian-12-go.dockerfile
+++ b/ci/docker/debian-12-go.dockerfile
@@ -16,8 +16,8 @@
 # under the License.
 
 ARG arch=amd64
-ARG go=1.21
-ARG staticcheck=v0.4.7
+ARG go=1.22
+ARG staticcheck=v0.5.1
 FROM ${arch}/golang:${go}-bookworm
 
 # FROM collects all the args, get back the staticcheck version arg
diff --git a/ci/docker/python-wheel-manylinux-test.dockerfile b/ci/docker/python-wheel-manylinux-test.dockerfile
index cdd0ae3ced756..09883f9780a36 100644
--- a/ci/docker/python-wheel-manylinux-test.dockerfile
+++ b/ci/docker/python-wheel-manylinux-test.dockerfile
@@ -16,15 +16,22 @@
 # under the License.
 
 ARG arch
-ARG python
-FROM ${arch}/python:${python}
-
-# RUN pip install --upgrade pip
+ARG python_image_tag
+FROM ${arch}/python:${python_image_tag}
 
 # pandas doesn't provide wheel for aarch64 yet, so cache the compiled
 # test dependencies in a docker image
 COPY python/requirements-wheel-test.txt /arrow/python/
 RUN pip install -r /arrow/python/requirements-wheel-test.txt
 
+# Install the GCS testbench with the system Python
+RUN apt-get update -y -q && \
+    apt-get install -y -q \
+        build-essential \
+        python3-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-RUN PYTHON=python /arrow/ci/scripts/install_gcs_testbench.sh default
+ENV PIPX_PYTHON=/usr/bin/python3 PIPX_PIP_ARGS=--prefer-binary
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index cb39667af1e10..42f088fd8a22a 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -103,7 +103,7 @@ RUN vcpkg install \
 # Configure Python for applications running in the bash shell of this Dockerfile
 ARG python=3.8
 ENV PYTHON_VERSION=${python}
-RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-*) && \
+RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}) && \
     echo "export PATH=$PYTHON_ROOT/bin:\$PATH" >> /etc/profile.d/python.sh
 
 SHELL ["/bin/bash", "-i", "-c"]
diff --git a/ci/docker/python-wheel-windows-test-vs2019.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
index 32bbb55e82689..625ab25f848f2 100644
--- a/ci/docker/python-wheel-windows-test-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -35,15 +35,27 @@ RUN setx path "%path%;C:\Program Files\Git\usr\bin"
 RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
     rm -rf Python*
 
+# Install the GCS testbench using a well-known Python version.
+# NOTE: cannot use pipx's `--fetch-missing-python` because of
+# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
+RUN choco install -r -y --pre --no-progress python --version=3.11.9
+ENV PIPX_BIN_DIR=C:\\Windows\\
+ENV PIPX_PYTHON="C:\Python311\python.exe"
+COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
+RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
+    storage-testbench -h
+
 # Define the full version number otherwise choco falls back to patch number 0 (3.8 => 3.8.0)
 ARG python=3.8
-RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
-    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
-    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.5" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.0" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts")
+RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10") & \
+    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13") & \
+    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11") & \
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1")
 
 # Install archiver to extract xz archives
-RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION% & \
-    python -m pip install --no-cache-dir -U pip setuptools & \
+RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% && \
     choco install --no-progress -r -y archiver
+
+ENV PYTHON=$python
diff --git a/ci/docker/python-wheel-windows-vs2019.dockerfile b/ci/docker/python-wheel-windows-vs2019.dockerfile
index ff42de939d91f..5a17e3e4c52c2 100644
--- a/ci/docker/python-wheel-windows-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2019.dockerfile
@@ -83,9 +83,10 @@ ARG python=3.8
 RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
     (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
     (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.5" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.0" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts")
-RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
+RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION%
 RUN python -m pip install -U pip setuptools
 
 COPY python/requirements-wheel-build.txt arrow/python/
diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
index e17c0306f115d..4d867a448c994 100644
--- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
index 341d8a87e8661..f26cad51f0983 100644
--- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
index a995ab2a8bc2d..125bc7ba46a81 100644
--- a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
@@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         tzdata-legacy \
         wget && \
diff --git a/ci/scripts/install_gcs_testbench.bat b/ci/scripts/install_gcs_testbench.bat
index b03d0c2ad6608..f54f98db7cac8 100644
--- a/ci/scripts/install_gcs_testbench.bat
+++ b/ci/scripts/install_gcs_testbench.bat
@@ -17,9 +17,18 @@
 
 @echo on
 
-set GCS_TESTBENCH_VERSION="v0.36.0"
+set GCS_TESTBENCH_VERSION="v0.40.0"
+
+set PIPX_FLAGS=--verbose
+if NOT "%PIPX_PYTHON%"=="" (
+  set PIPX_FLAGS=--python %PIPX_PYTHON% %PIPX_FLAGS%
+)
+
+python -m pip install -U pipx || exit /B 1
 
 @REM Install GCS testbench %GCS_TESTBENCH_VERSION%
-python -m pip install  ^
+pipx install %PIPX_FLAGS% ^
         "https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^
         || exit /B 1
+
+pipx list --verbose
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 2090290c99322..48a5858a358c9 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 
 if [ "$#" -ne 1 ]; then
   echo "Usage: $0 <storage-testbench version>"
@@ -34,15 +34,26 @@ case "$(uname -m)" in
     ;;
 esac
 
-# On newer pythons install into the system will fail, so override that
-export PIP_BREAK_SYSTEM_PACKAGES=1
-
 version=$1
 if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
-  # Latests versions of Testbench require newer setuptools
-  ${PYTHON:-python3} -m pip install --upgrade setuptools
 fi
 
-${PYTHON:-python3} -m pip install \
+# The Python to install pipx with
+: ${PIPX_BASE_PYTHON:=$(which python3)}
+# The Python to install the GCS testbench with
+: ${PIPX_PYTHON:=${PIPX_BASE_PYTHON:-$(which python3)}}
+
+export PIP_BREAK_SYSTEM_PACKAGES=1
+${PIPX_BASE_PYTHON} -m pip install -U pipx
+
+pipx_flags=(--verbose --python ${PIPX_PYTHON})
+if [[ $(id -un) == "root" ]]; then
+  # Install globally as /root/.local/bin is typically not in $PATH
+  pipx_flags+=(--global)
+fi
+if [[ -n "${PIPX_PIP_ARGS}" ]]; then
+  pipx_flags+=(--pip-args "'${PIPX_PIP_ARGS}'")
+fi
+${PIPX_BASE_PYTHON} -m pipx install ${pipx_flags[@]} \
   "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
index 5f962f02b911b..42d0e9ca179fb 100755
--- a/ci/scripts/install_python.sh
+++ b/ci/scripts/install_python.sh
@@ -28,8 +28,9 @@ declare -A versions
 versions=([3.8]=3.8.10
           [3.9]=3.9.13
           [3.10]=3.10.11
-          [3.11]=3.11.5
-          [3.12]=3.12.0)
+          [3.11]=3.11.9
+          [3.12]=3.12.4
+          [3.13]=3.13.0)
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <platform> <version>"
@@ -46,7 +47,14 @@ full_version=${versions[$2]}
 if [ $platform = "macOS" ]; then
     echo "Downloading Python installer..."
 
-    if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ] || [ "$version" = "3.11" ] || [ "$version" = "3.12" ]; then
+    if [ "$version" = "3.13" ];
+    then
+        fname="python-${full_version}rc1-macos11.pkg"
+    elif [ "$(uname -m)" = "arm64" ] || \
+         [ "$version" = "3.10" ] || \
+         [ "$version" = "3.11" ] || \
+         [ "$version" = "3.12" ];
+    then
         fname="python-${full_version}-macos11.pkg"
     else
         fname="python-${full_version}-macosx10.9.pkg"
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 3ed9d5d8dd12f..92b962f1740bd 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -34,7 +34,7 @@ rm -rf ${source_dir}/python/pyarrow/*.so.*
 
 echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
 export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
-export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.15}
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-12.0}
 export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
 
 if [ $arch = "arm64" ]; then
@@ -48,13 +48,11 @@ fi
 
 echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
 export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
-export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
 
 pip install \
   --upgrade \
   --only-binary=:all: \
   --target $PIP_SITE_PACKAGES \
-  --platform $PIP_TARGET_PLATFORM \
   -r ${source_dir}/python/requirements-wheel-build.txt
 pip install "delocate>=0.10.3"
 
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index 87c0bb1252024..cac3f18434b6c 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -37,28 +37,32 @@ set PYARROW_TEST_TENSORFLOW=ON
 set ARROW_TEST_DATA=C:\arrow\testing\data
 set PARQUET_TEST_DATA=C:\arrow\cpp\submodules\parquet-testing\data
 
-@REM Install testing dependencies
-pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
+@REM List installed Pythons
+py -0p
+
+set PYTHON_CMD=py -%PYTHON%
 
-@REM Install GCS testbench
-call "C:\arrow\ci\scripts\install_gcs_testbench.bat"
+%PYTHON_CMD% -m pip install -U pip setuptools || exit /B 1
+
+@REM Install testing dependencies
+%PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
 
 @REM Install the built wheels
-python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1 
+%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1
 
 @REM Test that the modules are importable
-python -c "import pyarrow" || exit /B 1
-python -c "import pyarrow._gcsfs" || exit /B 1
-python -c "import pyarrow._hdfs" || exit /B 1 
-python -c "import pyarrow._s3fs" || exit /B 1
-python -c "import pyarrow.csv" || exit /B 1
-python -c "import pyarrow.dataset" || exit /B 1
-python -c "import pyarrow.flight" || exit /B 1
-python -c "import pyarrow.fs" || exit /B 1
-python -c "import pyarrow.json" || exit /B 1
-python -c "import pyarrow.orc" || exit /B 1
-python -c "import pyarrow.parquet" || exit /B 1
-python -c "import pyarrow.substrait" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._gcsfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._hdfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._s3fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.csv" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.dataset" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.flight" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.json" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.orc" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.parquet" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1
 
 @rem Download IANA Timezone Database for ORC C++
 curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
@@ -67,4 +71,4 @@ arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata
 set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo
 
 @REM Execute unittest
-pytest -r s --pyargs pyarrow || exit /B 1
+%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
diff --git a/ci/vcpkg/arm64-osx-static-debug.cmake b/ci/vcpkg/arm64-osx-static-debug.cmake
index f511819a2edd9..32ae7bc433489 100644
--- a/ci/vcpkg/arm64-osx-static-debug.cmake
+++ b/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/arm64-osx-static-release.cmake b/ci/vcpkg/arm64-osx-static-release.cmake
index 43d65efb2651b..dde46cd763afe 100644
--- a/ci/vcpkg/arm64-osx-static-release.cmake
+++ b/ci/vcpkg/arm64-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/ci/vcpkg/universal2-osx-static-debug.cmake b/ci/vcpkg/universal2-osx-static-debug.cmake
index 8abc1ebf838f1..d3ef0d67eb719 100644
--- a/ci/vcpkg/universal2-osx-static-debug.cmake
+++ b/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/universal2-osx-static-release.cmake b/ci/vcpkg/universal2-osx-static-release.cmake
index 2eb36c15175b2..3018aa93e5fbb 100644
--- a/ci/vcpkg/universal2-osx-static-release.cmake
+++ b/ci/vcpkg/universal2-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a1e3138da9e0b..5ead9e4b063cd 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -84,7 +84,7 @@ set(ARROW_VERSION "18.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
 # if no build type is specified, default to release builds
-if(NOT DEFINED CMAKE_BUILD_TYPE)
+if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE
       Release
       CACHE STRING "Choose the type of build.")
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 41466a1c22404..755887314d110 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -303,7 +303,10 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_IPC)
 
   define_option(ARROW_AZURE
-                "Build Arrow with Azure support (requires the Azure SDK for C++)" OFF)
+                "Build Arrow with Azure support (requires the Azure SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_BUILD_UTILITIES "Build Arrow commandline utilities" OFF)
 
@@ -346,9 +349,16 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_WITH_UTF8PROC)
 
   define_option(ARROW_GCS
-                "Build Arrow with GCS support (requires the GCloud SDK for C++)" OFF)
+                "Build Arrow with GCS support (requires the GCloud SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
-  define_option(ARROW_HDFS "Build the Arrow HDFS bridge" OFF)
+  define_option(ARROW_HDFS
+                "Build the Arrow HDFS bridge"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_IPC "Build the Arrow IPC extensions" ON)
 
@@ -398,7 +408,11 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_HDFS
                 ARROW_JSON)
 
-  define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF)
+  define_option(ARROW_S3
+                "Build Arrow with S3 support (requires the AWS SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_SKYHOOK
                 "Build the Skyhook libraries"
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 63e2c036c9a6f..b31037a973279 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -259,7 +259,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
       IS_RUNTIME_DEPENDENCY
       REQUIRED_VERSION
       USE_CONFIG)
-  set(multi_value_args COMPONENTS PC_PACKAGE_NAMES)
+  set(multi_value_args COMPONENTS OPTIONAL_COMPONENTS PC_PACKAGE_NAMES)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -287,6 +287,9 @@ macro(resolve_dependency DEPENDENCY_NAME)
   if(ARG_COMPONENTS)
     list(APPEND FIND_PACKAGE_ARGUMENTS COMPONENTS ${ARG_COMPONENTS})
   endif()
+  if(ARG_OPTIONAL_COMPONENTS)
+    list(APPEND FIND_PACKAGE_ARGUMENTS OPTIONAL_COMPONENTS ${ARG_OPTIONAL_COMPONENTS})
+  endif()
   if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO")
     find_package(${FIND_PACKAGE_ARGUMENTS})
     set(COMPATIBLE ${${PACKAGE_NAME}_FOUND})
@@ -1289,15 +1292,19 @@ if(ARROW_USE_BOOST)
     set(Boost_USE_STATIC_LIBS ON)
   endif()
   if(ARROW_BOOST_REQUIRE_LIBRARY)
-    set(ARROW_BOOST_COMPONENTS system filesystem)
+    set(ARROW_BOOST_COMPONENTS filesystem system)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS process)
   else()
     set(ARROW_BOOST_COMPONENTS)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS)
   endif()
   resolve_dependency(Boost
                      REQUIRED_VERSION
                      ${ARROW_BOOST_REQUIRED_VERSION}
                      COMPONENTS
                      ${ARROW_BOOST_COMPONENTS}
+                     OPTIONAL_COMPONENTS
+                     ${ARROW_BOOST_OPTIONAL_COMPONENTS}
                      IS_RUNTIME_DEPENDENCY
                      # libarrow.so doesn't depend on libboost*.
                      FALSE)
@@ -1316,14 +1323,35 @@ if(ARROW_USE_BOOST)
     endif()
   endforeach()
 
-  if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    # boost/process/detail/windows/handle_workaround.hpp doesn't work
-    # without BOOST_USE_WINDOWS_H with MinGW because MinGW doesn't
-    # provide __kernel_entry without winternl.h.
-    #
-    # See also:
-    # https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
-    target_compile_definitions(Boost::headers INTERFACE "BOOST_USE_WINDOWS_H=1")
+  if(TARGET Boost::process)
+    # Boost >= 1.86
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V1")
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+  else()
+    # Boost < 1.86
+    add_library(Boost::process INTERFACE IMPORTED)
+    if(TARGET Boost::filesystem)
+      target_link_libraries(Boost::process INTERFACE Boost::filesystem)
+    endif()
+    if(TARGET Boost::system)
+      target_link_libraries(Boost::process INTERFACE Boost::system)
+    endif()
+    if(TARGET Boost::headers)
+      target_link_libraries(Boost::process INTERFACE Boost::headers)
+    endif()
+    if(Boost_VERSION VERSION_GREATER_EQUAL 1.80)
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+      # Boost < 1.86 has a bug that
+      # boost::process::v2::process_environment::on_setup() isn't
+      # defined. We need to build Boost Process source to define it.
+      #
+      # See also:
+      # https://github.com/boostorg/process/issues/312
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_NEED_SOURCE")
+      if(WIN32)
+        target_link_libraries(Boost::process INTERFACE bcrypt ntdll)
+      endif()
+    endif()
   endif()
 
   message(STATUS "Boost include dir: ${Boost_INCLUDE_DIRS}")
diff --git a/cpp/examples/minimal_build/CMakeLists.txt b/cpp/examples/minimal_build/CMakeLists.txt
index b4a7cde938c87..95dad34221add 100644
--- a/cpp/examples/minimal_build/CMakeLists.txt
+++ b/cpp/examples/minimal_build/CMakeLists.txt
@@ -30,7 +30,7 @@ endif()
 # We require a C++17 compliant compiler
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-if(NOT DEFINED CMAKE_BUILD_TYPE)
+if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Release)
 endif()
 
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index fb7253b6fd69d..01ac813f4713b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -373,7 +373,10 @@ set(ARROW_SRCS
     config.cc
     datum.cc
     device.cc
+    device_allocation_type_set.cc
     extension_type.cc
+    extension/bool8.cc
+    extension/uuid.cc
     pretty_print.cc
     record_batch.cc
     result.cc
@@ -641,9 +644,13 @@ else()
 endif()
 
 set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
-set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
-set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
-                                   ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process)
+set(ARROW_TESTING_STATIC_LINK_LIBS
+    arrow::flatbuffers
+    RapidJSON
+    Boost::process
+    arrow_static
+    ${ARROW_GTEST_GTEST})
 set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
 set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
 # that depend on gtest
@@ -664,9 +671,10 @@ set(ARROW_TESTING_SRCS
     io/test_common.cc
     ipc/test_common.cc
     testing/fixed_width_test_util.cc
+    testing/generator.cc
     testing/gtest_util.cc
+    testing/process.cc
     testing/random.cc
-    testing/generator.cc
     testing/util.cc)
 
 #
@@ -906,7 +914,6 @@ endif()
 
 if(ARROW_JSON)
   arrow_add_object_library(ARROW_JSON
-                           extension/bool8.cc
                            extension/fixed_shape_tensor.cc
                            extension/opaque.cc
                            json/options.cc
@@ -1225,6 +1232,7 @@ add_subdirectory(testing)
 add_subdirectory(array)
 add_subdirectory(c)
 add_subdirectory(compute)
+add_subdirectory(extension)
 add_subdirectory(io)
 add_subdirectory(tensor)
 add_subdirectory(util)
@@ -1267,7 +1275,6 @@ endif()
 
 if(ARROW_JSON)
   add_subdirectory(json)
-  add_subdirectory(extension)
 endif()
 
 if(ARROW_ORC)
diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc
index d398fb24b73d5..c623271db9fb4 100644
--- a/cpp/src/arrow/acero/aggregate_node_test.cc
+++ b/cpp/src/arrow/acero/aggregate_node_test.cc
@@ -210,5 +210,57 @@ TEST(GroupByNode, NoSkipNulls) {
   AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch}, out_batches.batches);
 }
 
+TEST(ScalarAggregateNode, AnyAll) {
+  // GH-43768: boolean_any and boolean_all with constant input should work well
+  // when min_count != 0.
+  std::shared_ptr<Schema> in_schema = schema({field("not_used", int32())});
+  std::shared_ptr<Schema> out_schema = schema({field("agg_out", boolean())});
+  struct AnyAllCase {
+    std::string batches_json;
+    Expression literal;
+    std::string expected_json;
+    bool skip_nulls = false;
+    uint32_t min_count = 2;
+  };
+  std::vector<AnyAllCase> cases{
+      {"[[42], [42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42], [42]]", literal(false), "[[false]]"},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]"},
+      {"[[42]]", literal(true), "[[null]]"},
+      {"[[42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42]]", literal(true), "[[null]]", /*skip_nulls=*/false,
+       /*min_count=*/4},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]",
+       /*skip_nulls=*/true},
+  };
+  for (const AnyAllCase& any_all_case : cases) {
+    for (auto func_name : {"any", "all"}) {
+      std::vector<ExecBatch> batches{
+          ExecBatchFromJSON({int32()}, any_all_case.batches_json)};
+      std::vector<Aggregate> aggregates = {
+          Aggregate(func_name,
+                    std::make_shared<compute::ScalarAggregateOptions>(
+                        /*skip_nulls=*/any_all_case.skip_nulls,
+                        /*min_count=*/any_all_case.min_count),
+                    FieldRef("literal"))};
+
+      // And a projection to make the input including a Scalar Boolean
+      Declaration plan = Declaration::Sequence(
+          {{"exec_batch_source", ExecBatchSourceNodeOptions(in_schema, batches)},
+           {"project", ProjectNodeOptions({any_all_case.literal}, {"literal"})},
+           {"aggregate", AggregateNodeOptions(aggregates)}});
+
+      ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema out_batches,
+                           DeclarationToExecBatches(plan));
+
+      ExecBatch expected_batch =
+          ExecBatchFromJSON({boolean()}, any_all_case.expected_json);
+
+      AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch},
+                                          out_batches.batches);
+    }
+  }
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc
index 470960b1c5062..e3e37e249e6a3 100644
--- a/cpp/src/arrow/acero/hash_join_benchmark.cc
+++ b/cpp/src/arrow/acero/hash_join_benchmark.cc
@@ -104,7 +104,7 @@ class JoinBenchmark {
       key_cmp.push_back(JoinKeyCmp::EQ);
     }
 
-    for (size_t i = 0; i < settings.build_payload_types.size(); i++) {
+    for (size_t i = 0; i < settings.probe_payload_types.size(); i++) {
       std::string name = "lp" + std::to_string(i);
       DCHECK_OK(l_schema_builder.AddField(field(name, settings.probe_payload_types[i])));
     }
@@ -279,7 +279,7 @@ static void BM_HashJoinBasic_MatchesPerRow(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(0));
 
   settings.num_build_batches = static_cast<int>(st.range(1));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
@@ -291,7 +291,7 @@ static void BM_HashJoinBasic_PayloadSize(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(1));
 
   settings.num_build_batches = static_cast<int>(st.range(2));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
diff --git a/cpp/src/arrow/acero/hash_join_dict.cc b/cpp/src/arrow/acero/hash_join_dict.cc
index 3aef08e6e9ccf..8db9dddb2c3a0 100644
--- a/cpp/src/arrow/acero/hash_join_dict.cc
+++ b/cpp/src/arrow/acero/hash_join_dict.cc
@@ -225,21 +225,20 @@ Status HashJoinDictBuild::Init(ExecContext* ctx, std::shared_ptr<Array> dictiona
     return Status::OK();
   }
 
-  dictionary_ = dictionary;
+  dictionary_ = std::move(dictionary);
 
   // Initialize encoder
   RowEncoder encoder;
-  std::vector<TypeHolder> encoder_types;
-  encoder_types.emplace_back(value_type_);
+  std::vector<TypeHolder> encoder_types{value_type_};
   encoder.Init(encoder_types, ctx);
 
   // Encode all dictionary values
-  int64_t length = dictionary->data()->length;
+  int64_t length = dictionary_->data()->length;
   if (length >= std::numeric_limits<int32_t>::max()) {
     return Status::Invalid(
         "Dictionary length in hash join must fit into signed 32-bit integer.");
   }
-  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary->data()}, length)));
+  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary_->data()}, length)));
 
   std::vector<int32_t> entries_to_take;
 
diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc
index 67f902e64be93..80dd163ced740 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -61,30 +61,30 @@ Result<std::vector<FieldRef>> HashJoinSchema::ComputePayload(
     const std::vector<FieldRef>& filter, const std::vector<FieldRef>& keys) {
   // payload = (output + filter) - keys, with no duplicates
   std::unordered_set<int> payload_fields;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : keys) {
+  for (const auto& ref : keys) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.erase(match[0]);
   }
 
   std::vector<FieldRef> payload_refs;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
       payload_fields.erase(match[0]);
     }
   }
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
@@ -198,7 +198,7 @@ Status HashJoinSchema::ValidateSchemas(JoinType join_type, const Schema& left_sc
     return Status::Invalid("Different number of key fields on left (", left_keys.size(),
                            ") and right (", right_keys.size(), ") side of the join");
   }
-  if (left_keys.size() < 1) {
+  if (left_keys.empty()) {
     return Status::Invalid("Join key cannot be empty");
   }
   for (size_t i = 0; i < left_keys.size() + right_keys.size(); ++i) {
@@ -432,7 +432,7 @@ Status HashJoinSchema::CollectFilterColumns(std::vector<FieldRef>& left_filter,
         indices[0] -= left_schema.num_fields();
         FieldPath corrected_path(std::move(indices));
         if (right_seen_paths.find(*path) == right_seen_paths.end()) {
-          right_filter.push_back(corrected_path);
+          right_filter.emplace_back(corrected_path);
           right_seen_paths.emplace(std::move(corrected_path));
         }
       } else if (left_seen_paths.find(*path) == left_seen_paths.end()) {
@@ -698,7 +698,7 @@ class HashJoinNode : public ExecNode, public TracedNode {
                std::shared_ptr<Schema> output_schema,
                std::unique_ptr<HashJoinSchema> schema_mgr, Expression filter,
                std::unique_ptr<HashJoinImpl> impl)
-      : ExecNode(plan, inputs, {"left", "right"},
+      : ExecNode(plan, std::move(inputs), {"left", "right"},
                  /*output_schema=*/std::move(output_schema)),
         TracedNode(this),
         join_type_(join_options.join_type),
diff --git a/cpp/src/arrow/acero/hash_join_node.h b/cpp/src/arrow/acero/hash_join_node.h
index ad60019ceabc4..19745b8675cf0 100644
--- a/cpp/src/arrow/acero/hash_join_node.h
+++ b/cpp/src/arrow/acero/hash_join_node.h
@@ -65,9 +65,9 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
   std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
                                            const std::string& right_field_name_suffix);
 
-  bool LeftPayloadIsEmpty() { return PayloadIsEmpty(0); }
+  bool LeftPayloadIsEmpty() const { return PayloadIsEmpty(0); }
 
-  bool RightPayloadIsEmpty() { return PayloadIsEmpty(1); }
+  bool RightPayloadIsEmpty() const { return PayloadIsEmpty(1); }
 
   static int kMissingField() {
     return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
@@ -88,7 +88,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
                                             const SchemaProjectionMap& right_to_filter,
                                             const Expression& filter);
 
-  bool PayloadIsEmpty(int side) {
+  bool PayloadIsEmpty(int side) const {
     assert(side == 0 || side == 1);
     return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
   }
diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc
index 9065e286a2228..76ad9c7d650eb 100644
--- a/cpp/src/arrow/acero/hash_join_node_test.cc
+++ b/cpp/src/arrow/acero/hash_join_node_test.cc
@@ -29,6 +29,7 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/row_encoder_internal.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/testing/extension_type.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc
index 4d0c8187ac6e2..6c783110af571 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -1667,7 +1667,7 @@ Result<std::shared_ptr<ArrayData>> JoinResultMaterialize::FlushBuildColumn(
     const std::shared_ptr<DataType>& data_type, const RowArray* row_array, int column_id,
     uint32_t* row_ids) {
   ResizableArrayData output;
-  output.Init(data_type, pool_, bit_util::Log2(num_rows_));
+  RETURN_NOT_OK(output.Init(data_type, pool_, bit_util::Log2(num_rows_)));
 
   for (size_t i = 0; i <= null_ranges_.size(); ++i) {
     int row_id_begin =
@@ -2247,8 +2247,9 @@ Result<ExecBatch> JoinResidualFilter::MaterializeFilterInput(
         build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD);
     for (int i = 0; i < num_build_cols; ++i) {
       ResizableArrayData column_data;
-      column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i), pool_,
-                       bit_util::Log2(num_batch_rows));
+      RETURN_NOT_OK(
+          column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i),
+                           pool_, bit_util::Log2(num_batch_rows)));
       if (auto idx = to_key.get(i); idx != SchemaProjectionMap::kMissingField) {
         RETURN_NOT_OK(build_keys_->DecodeSelected(&column_data, idx, num_batch_rows,
                                                   key_ids_maybe_null, pool_));
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 716ae0722069e..e4af67d7e5f0b 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -232,6 +232,14 @@ class ARROW_EXPORT Array {
   /// \return DeviceAllocationType
   DeviceAllocationType device_type() const { return data_->device_type(); }
 
+  /// \brief Return the statistics of this Array
+  ///
+  /// This just delegates to calling statistics on the underlying ArrayData
+  /// object which backs this Array.
+  ///
+  /// \return const ArrayStatistics&
+  std::shared_ptr<ArrayStatistics> statistics() const { return data_->statistics; }
+
  protected:
   Array() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 32806d9d2edb3..73e0c692432b6 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -3709,6 +3709,132 @@ TEST(TestSwapEndianArrayData, InvalidLength) {
   }
 }
 
+class TestArrayDataStatistics : public ::testing::Test {
+ public:
+  void SetUp() {
+    valids_ = {1, 0, 1, 1};
+    null_count_ = std::count(valids_.begin(), valids_.end(), 0);
+    null_buffer_ = *internal::BytesToBits(valids_);
+    values_ = {1, 0, 3, -4};
+    min_ = *std::min_element(values_.begin(), values_.end());
+    max_ = *std::max_element(values_.begin(), values_.end());
+    values_buffer_ = Buffer::FromVector(values_);
+    data_ = ArrayData::Make(int32(), values_.size(), {null_buffer_, values_buffer_},
+                            null_count_);
+    data_->statistics = std::make_shared<ArrayStatistics>();
+    data_->statistics->null_count = null_count_;
+    data_->statistics->min = min_;
+    data_->statistics->is_min_exact = true;
+    data_->statistics->max = max_;
+    data_->statistics->is_max_exact = true;
+  }
+
+ protected:
+  std::vector<uint8_t> valids_;
+  size_t null_count_;
+  std::shared_ptr<Buffer> null_buffer_;
+  std::vector<int32_t> values_;
+  int64_t min_;
+  int64_t max_;
+  std::shared_ptr<Buffer> values_buffer_;
+  std::shared_ptr<ArrayData> data_;
+};
+
+TEST_F(TestArrayDataStatistics, MoveConstructor) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data(std::move(copied_data));
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyConstructor) {
+  ArrayData copied_data(*data_);
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, MoveAssignment) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data;
+  moved_data = std::move(copied_data);
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyAssignment) {
+  ArrayData copied_data;
+  copied_data = *data_;
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyTo) {
+  ASSERT_OK_AND_ASSIGN(auto copied_data,
+                       data_->CopyTo(arrow::default_cpu_memory_manager()));
+
+  ASSERT_TRUE(copied_data->statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data->statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data->statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_TRUE(copied_data->statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data->statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_TRUE(copied_data->statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, Slice) {
+  auto sliced_data = data_->Slice(0, 1);
+  ASSERT_FALSE(sliced_data->statistics);
+}
+
 template <typename PType>
 class TestPrimitiveArray : public ::testing::Test {
  public:
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 83eeb56c496cf..8e29297a8c175 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -165,6 +165,8 @@ Result<std::shared_ptr<ArrayData>> CopyToImpl(const ArrayData& data,
     ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
   }
 
+  output->statistics = data.statistics;
+
   return output;
 }
 }  // namespace
@@ -195,6 +197,7 @@ std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
   } else {
     copy->null_count = null_count != 0 ? kUnknownNullCount : 0;
   }
+  copy->statistics = nullptr;
   return copy;
 }
 
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index e0508fe6980a7..1e6ee9a1d32ff 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/array/statistics.h"
 #include "arrow/buffer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
@@ -152,7 +153,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(std::move(other.buffers)),
         child_data(std::move(other.child_data)),
-        dictionary(std::move(other.dictionary)) {
+        dictionary(std::move(other.dictionary)),
+        statistics(std::move(other.statistics)) {
     SetNullCount(other.null_count);
   }
 
@@ -163,7 +165,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(other.buffers),
         child_data(other.child_data),
-        dictionary(other.dictionary) {
+        dictionary(other.dictionary),
+        statistics(other.statistics) {
     SetNullCount(other.null_count);
   }
 
@@ -176,6 +179,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = std::move(other.buffers);
     child_data = std::move(other.child_data);
     dictionary = std::move(other.dictionary);
+    statistics = std::move(other.statistics);
     return *this;
   }
 
@@ -188,6 +192,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = other.buffers;
     child_data = other.child_data;
     dictionary = other.dictionary;
+    statistics = other.statistics;
     return *this;
   }
 
@@ -274,6 +279,18 @@ struct ARROW_EXPORT ArrayData {
   }
 
   /// \brief Construct a zero-copy slice of the data with the given offset and length
+  ///
+  /// The associated `ArrayStatistics` is always discarded in a sliced
+  /// `ArrayData`. Because `ArrayStatistics` in the original
+  /// `ArrayData` may be invalid in a sliced `ArrayData`. If you want
+  /// to reuse statistics in the original `ArrayData`, you need to do
+  /// it by yourself.
+  ///
+  /// If the specified slice range has the same range as the original
+  /// `ArrayData`, we can reuse statistics in the original
+  /// `ArrayData`. Because it has the same data as the original
+  /// `ArrayData`. But the associated `ArrayStatistics` is discarded
+  /// in this case too. Use `Copy()` instead for the case.
   std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
 
   /// \brief Input-checking variant of Slice
@@ -390,6 +407,9 @@ struct ARROW_EXPORT ArrayData {
 
   // The dictionary for this Array, if any. Only used for dictionary type
   std::shared_ptr<ArrayData> dictionary;
+
+  // The statistics for this Array.
+  std::shared_ptr<ArrayStatistics> statistics;
 };
 
 /// \brief A non-owning Buffer reference
diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
index 55eec53ced1c7..854127480744e 100644
--- a/cpp/src/arrow/chunk_resolver.cc
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -60,42 +60,38 @@ inline std::vector<int64_t> MakeChunksOffsets(const std::vector<T>& chunks) {
 template <typename IndexType>
 void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets,
                        int64_t n_indices, const IndexType* logical_index_vec,
-                       IndexType* out_chunk_index_vec, IndexType chunk_hint,
-                       IndexType* out_index_in_chunk_vec) {
+                       TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                       IndexType chunk_hint) {
   auto* offsets = reinterpret_cast<const uint64_t*>(signed_offsets);
   const auto num_chunks = static_cast<IndexType>(num_offsets - 1);
   // chunk_hint in [0, num_offsets) per the precondition.
   for (int64_t i = 0; i < n_indices; i++) {
-    const auto index = static_cast<uint64_t>(logical_index_vec[i]);
+    auto typed_logical_index = logical_index_vec[i];
+    const auto index = static_cast<uint64_t>(typed_logical_index);
+    // use or update chunk_hint
     if (index >= offsets[chunk_hint] &&
         (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) {
-      out_chunk_index_vec[i] = chunk_hint;  // hint is correct!
-      continue;
+      // hint is correct!
+    } else {
+      // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
+      auto chunk_index =
+          ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
+      chunk_hint = static_cast<IndexType>(chunk_index);
     }
-    // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
-    auto chunk_index =
-        ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
-    chunk_hint = static_cast<IndexType>(chunk_index);
-    out_chunk_index_vec[i] = chunk_hint;
-  }
-  if (out_index_in_chunk_vec != NULLPTR) {
-    for (int64_t i = 0; i < n_indices; i++) {
-      auto logical_index = logical_index_vec[i];
-      auto chunk_index = out_chunk_index_vec[i];
-      // chunk_index is in [0, chunks.size()] no matter what the
-      // value of logical_index is, so it's always safe to dereference
-      // offset_ as it contains chunks.size()+1 values.
-      out_index_in_chunk_vec[i] =
-          logical_index - static_cast<IndexType>(offsets[chunk_index]);
+    out_chunk_location_vec[i].chunk_index = chunk_hint;
+    // chunk_index is in [0, chunks.size()] no matter what the
+    // value of logical_index is, so it's always safe to dereference
+    // offset_ as it contains chunks.size()+1 values.
+    out_chunk_location_vec[i].index_in_chunk =
+        typed_logical_index - static_cast<IndexType>(offsets[chunk_hint]);
 #if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)
-      // Make it more likely that Valgrind/ASAN can catch an invalid memory
-      // access by poisoning out_index_in_chunk_vec[i] when the logical
-      // index is out-of-bounds.
-      if (chunk_index == num_chunks) {
-        out_index_in_chunk_vec[i] = std::numeric_limits<IndexType>::max();
-      }
-#endif
+    // Make it more likely that Valgrind/ASAN can catch an invalid memory
+    // access by poisoning the index-in-chunk value when the logical
+    // index is out-of-bounds.
+    if (chunk_hint == num_chunks) {
+      out_chunk_location_vec[i].index_in_chunk = std::numeric_limits<IndexType>::max();
     }
+#endif
   }
 }
 
@@ -130,31 +126,31 @@ ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept {
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* logical_index_vec,
-                                    uint8_t* out_chunk_index_vec, uint8_t chunk_hint,
-                                    uint8_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint8_t>* out_chunk_location_vec,
+                                    uint8_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* logical_index_vec,
-                                    uint32_t* out_chunk_index_vec, uint32_t chunk_hint,
-                                    uint32_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint32_t>* out_chunk_location_vec,
+                                    uint32_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* logical_index_vec,
-                                    uint16_t* out_chunk_index_vec, uint16_t chunk_hint,
-                                    uint16_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint16_t>* out_chunk_location_vec,
+                                    uint16_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_index_vec,
-                                    uint64_t* out_chunk_index_vec, uint64_t chunk_hint,
-                                    uint64_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint64_t>* out_chunk_location_vec,
+                                    uint64_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 }  // namespace arrow::internal
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index a2a3d5a864243..83fda62387fe1 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -31,28 +31,34 @@ namespace arrow::internal {
 
 struct ChunkResolver;
 
-struct ChunkLocation {
+template <typename IndexType>
+struct TypedChunkLocation {
   /// \brief Index of the chunk in the array of chunks
   ///
   /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
   /// to represent out-of-bounds locations.
-  int64_t chunk_index = 0;
+  IndexType chunk_index = 0;
 
   /// \brief Index of the value in the chunk
   ///
   /// The value is UNDEFINED if chunk_index >= chunks.size()
-  int64_t index_in_chunk = 0;
+  IndexType index_in_chunk = 0;
 
-  ChunkLocation() = default;
+  TypedChunkLocation() = default;
 
-  ChunkLocation(int64_t chunk_index, int64_t index_in_chunk)
-      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {}
+  TypedChunkLocation(IndexType chunk_index, IndexType index_in_chunk)
+      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {
+    static_assert(sizeof(TypedChunkLocation<IndexType>) == 2 * sizeof(IndexType));
+    static_assert(alignof(TypedChunkLocation<IndexType>) == alignof(IndexType));
+  }
 
-  bool operator==(ChunkLocation other) const {
+  bool operator==(TypedChunkLocation other) const {
     return chunk_index == other.chunk_index && index_in_chunk == other.index_in_chunk;
   }
 };
 
+using ChunkLocation = TypedChunkLocation<int64_t>;
+
 /// \brief An utility that incrementally resolves logical indices into
 /// physical indices in a chunked array.
 struct ARROW_EXPORT ChunkResolver {
@@ -144,26 +150,25 @@ struct ARROW_EXPORT ChunkResolver {
   ///
   /// \pre 0 <= logical_index_vec[i] < logical_array_length()
   ///      (for well-defined and valid chunk index results)
-  /// \pre out_chunk_index_vec has space for `n_indices`
+  /// \pre out_chunk_location_vec has space for `n_indices` locations
   /// \pre chunk_hint in [0, chunks.size()]
-  /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n)
+  /// \post out_chunk_location_vec[i].chunk_index in [0, chunks.size()] for i in [0, n)
   /// \post if logical_index_vec[i] >= chunked_array.length(), then
-  ///       out_chunk_index_vec[i] == chunks.size()
-  ///       and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds)
-  /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and
-  ///       out_index_in_chunk_vec[i] are UNDEFINED
+  ///       out_chunk_location_vec[i].chunk_index == chunks.size()
+  ///       and out_chunk_location_vec[i].index_in_chunk is UNDEFINED (can be
+  ///       out-of-bounds)
+  /// \post if logical_index_vec[i] < 0, then both values in out_chunk_index_vec[i]
+  ///       are UNDEFINED
   ///
   /// \param n_indices The number of logical indices to resolve
   /// \param logical_index_vec The logical indices to resolve
-  /// \param out_chunk_index_vec The output array where the chunk indices will be written
+  /// \param out_chunk_location_vec The output array where the locations will be written
   /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany
-  /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the
-  ///                               within-chunk indices will be written
   /// \return false iff chunks.size() > std::numeric_limits<IndexType>::max()
   template <typename IndexType>
   [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* logical_index_vec,
-                                 IndexType* out_chunk_index_vec, IndexType chunk_hint = 0,
-                                 IndexType* out_index_in_chunk_vec = NULLPTR) const {
+                                 TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                                 IndexType chunk_hint = 0) const {
     if constexpr (sizeof(IndexType) < sizeof(uint64_t)) {
       // The max value returned by Bisect is `offsets.size() - 1` (= chunks.size()).
       constexpr uint64_t kMaxIndexTypeValue = std::numeric_limits<IndexType>::max();
@@ -188,13 +193,11 @@ struct ARROW_EXPORT ChunkResolver {
       // logical index in the chunked array.
       using U = std::make_unsigned_t<IndexType>;
       ResolveManyImpl(n_indices, reinterpret_cast<const U*>(logical_index_vec),
-                      reinterpret_cast<U*>(out_chunk_index_vec),
-                      static_cast<U>(chunk_hint),
-                      reinterpret_cast<U*>(out_index_in_chunk_vec));
+                      reinterpret_cast<TypedChunkLocation<U>*>(out_chunk_location_vec),
+                      static_cast<U>(chunk_hint));
     } else {
       static_assert(std::is_unsigned_v<IndexType>);
-      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_index_vec, chunk_hint,
-                      out_index_in_chunk_vec);
+      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_location_vec, chunk_hint);
     }
     return true;
   }
@@ -226,10 +229,14 @@ struct ARROW_EXPORT ChunkResolver {
 
   /// \pre all the pre-conditions of ChunkResolver::ResolveMany()
   /// \pre num_offsets - 1 <= std::numeric_limits<IndexType>::max()
-  void ResolveManyImpl(int64_t, const uint8_t*, uint8_t*, uint8_t, uint8_t*) const;
-  void ResolveManyImpl(int64_t, const uint16_t*, uint16_t*, uint16_t, uint16_t*) const;
-  void ResolveManyImpl(int64_t, const uint32_t*, uint32_t*, uint32_t, uint32_t*) const;
-  void ResolveManyImpl(int64_t, const uint64_t*, uint64_t*, uint64_t, uint64_t*) const;
+  void ResolveManyImpl(int64_t, const uint8_t*, TypedChunkLocation<uint8_t>*,
+                       uint8_t) const;
+  void ResolveManyImpl(int64_t, const uint16_t*, TypedChunkLocation<uint16_t>*,
+                       uint16_t) const;
+  void ResolveManyImpl(int64_t, const uint32_t*, TypedChunkLocation<uint32_t>*,
+                       uint32_t) const;
+  void ResolveManyImpl(int64_t, const uint64_t*, TypedChunkLocation<uint64_t>*,
+                       uint64_t) const;
 
  public:
   /// \brief Find the index of the chunk that contains the logical index.
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index c36b736d5d5df..dd6aa51534fcb 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -27,6 +27,7 @@
 #include "arrow/array/array_nested.h"
 #include "arrow/array/util.h"
 #include "arrow/array/validate.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/pretty_print.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -86,6 +87,18 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::MakeEmpty(
   return std::make_shared<ChunkedArray>(std::move(new_chunks));
 }
 
+DeviceAllocationTypeSet ChunkedArray::device_types() const {
+  if (chunks_.empty()) {
+    // An empty ChunkedArray is considered to be CPU-only.
+    return DeviceAllocationTypeSet::CpuOnly();
+  }
+  DeviceAllocationTypeSet set;
+  for (const auto& chunk : chunks_) {
+    set.add(chunk->device_type());
+  }
+  return set;
+}
+
 bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) const {
   if (length_ != other.length()) {
     return false;
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 5d300861d85c2..c65b6cb6e227f 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -25,6 +25,7 @@
 
 #include "arrow/chunk_resolver.h"
 #include "arrow/compare.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
@@ -116,6 +117,13 @@ class ARROW_EXPORT ChunkedArray {
   /// \return an ArrayVector of chunks
   const ArrayVector& chunks() const { return chunks_; }
 
+  /// \return The set of device allocation types used by the chunks in this
+  /// chunked array.
+  DeviceAllocationTypeSet device_types() const;
+
+  /// \return true if all chunks are allocated on CPU-accessible memory.
+  bool is_cpu() const { return device_types().is_cpu_only(); }
+
   /// \brief Construct a zero-copy slice of the chunked array with the
   /// indicated offset and length
   ///
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index e9cc283b53cd5..bf9d4af7c7bb0 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -37,6 +37,7 @@ namespace arrow {
 
 using internal::ChunkLocation;
 using internal::ChunkResolver;
+using internal::TypedChunkLocation;
 
 class TestChunkedArray : public ::testing::Test {
  protected:
@@ -61,12 +62,17 @@ TEST_F(TestChunkedArray, Make) {
                        ChunkedArray::Make({}, int64()));
   AssertTypeEqual(*int64(), *result->type());
   ASSERT_EQ(result->num_chunks(), 0);
+  // Empty chunked arrays are treated as CPU-allocated.
+  ASSERT_TRUE(result->is_cpu());
 
   auto chunk0 = ArrayFromJSON(int8(), "[0, 1, 2]");
   auto chunk1 = ArrayFromJSON(int16(), "[3, 4, 5]");
 
   ASSERT_OK_AND_ASSIGN(result, ChunkedArray::Make({chunk0, chunk0}));
   ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8()));
+  // All chunks are CPU-accessible.
+  ASSERT_TRUE(result->is_cpu());
+  ASSERT_TRUE(result2->is_cpu());
   AssertChunkedEqual(*result, *result2);
 
   ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0, chunk1}));
@@ -375,24 +381,26 @@ class TestChunkResolverMany : public ::testing::Test {
   Result<std::vector<ChunkLocation>> ResolveMany(
       const ChunkResolver& resolver, const std::vector<IndexType>& logical_index_vec) {
     const size_t n = logical_index_vec.size();
-    std::vector<IndexType> chunk_index_vec;
-    chunk_index_vec.resize(n);
-    std::vector<IndexType> index_in_chunk_vec;
-    index_in_chunk_vec.resize(n);
+    std::vector<TypedChunkLocation<IndexType>> chunk_location_vec;
+    chunk_location_vec.resize(n);
     bool valid = resolver.ResolveMany<IndexType>(
-        static_cast<int64_t>(n), logical_index_vec.data(), chunk_index_vec.data(), 0,
-        index_in_chunk_vec.data());
+        static_cast<int64_t>(n), logical_index_vec.data(), chunk_location_vec.data(), 0);
     if (ARROW_PREDICT_FALSE(!valid)) {
       return Status::Invalid("index type doesn't fit possible chunk indexes");
     }
-    std::vector<ChunkLocation> locations;
-    locations.reserve(n);
-    for (size_t i = 0; i < n; i++) {
-      auto chunk_index = static_cast<int64_t>(chunk_index_vec[i]);
-      auto index_in_chunk = static_cast<int64_t>(index_in_chunk_vec[i]);
-      locations.emplace_back(chunk_index, index_in_chunk);
+    if constexpr (std::is_same<decltype(ChunkLocation::chunk_index), IndexType>::value) {
+      return chunk_location_vec;
+    } else {
+      std::vector<ChunkLocation> locations;
+      locations.reserve(n);
+      for (size_t i = 0; i < n; i++) {
+        auto loc = chunk_location_vec[i];
+        auto chunk_index = static_cast<int64_t>(loc.chunk_index);
+        auto index_in_chunk = static_cast<int64_t>(loc.index_in_chunk);
+        locations.emplace_back(chunk_index, index_in_chunk);
+      }
+      return locations;
     }
-    return locations;
   }
 
   void CheckResolveMany(const ChunkResolver& resolver,
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index e1a2e8c5d8879..0478a3d1e801a 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/tracing_internal.h"
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 5c87ef2cd0561..5e7461cc52d0e 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -24,6 +24,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 1adb3e96c97c8..cfa1cd8193f36 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -31,6 +31,7 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 1fbcd6a249093..b545d8bcc1003 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -23,7 +23,9 @@
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/hashing.h"
 
-#include <memory>
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
 
 namespace arrow {
 namespace compute {
@@ -276,11 +278,6 @@ struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {
   using SumImpl<ArrowType, SimdLevel::NONE>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
-  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
                                              const KernelInitArgs& args) {
   SumLikeInit<SumImplDefault> visitor(
@@ -289,6 +286,14 @@ Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
   return visitor.Create();
 }
 
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType>
+struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
+  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
                                               const KernelInitArgs& args) {
   MeanKernelInit<MeanImplDefault> visitor(
@@ -482,8 +487,8 @@ void AddFirstOrLastAggKernel(ScalarAggregateFunction* func,
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitDefault(KernelContext* ctx,
+                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::NONE> visitor(
@@ -532,13 +537,13 @@ struct BooleanAnyImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
-      this->count += scalar.is_valid;
+      this->has_nulls |= !scalar.is_valid;
+      this->any |= scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      this->count += scalar.is_valid * batch.length;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[0].data, data.offset, data.buffers[1].data, data.offset,
@@ -603,13 +608,13 @@ struct BooleanAllImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->count += scalar.is_valid;
-      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      this->has_nulls |= !scalar.is_valid;
+      this->count += scalar.is_valid * batch.length;
+      this->all &= !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[1].data, data.offset, data.buffers[0].data, data.offset,
@@ -1114,14 +1119,14 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   // Add min max function
   func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(), min_max_doc,
                                                    &default_scalar_aggregate_options);
-  AddMinMaxKernels(MinMaxInit, {null(), boolean()}, func.get());
-  AddMinMaxKernels(MinMaxInit, NumericTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, TemporalTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, BaseBinaryTypes(), func.get());
-  AddMinMaxKernel(MinMaxInit, Type::FIXED_SIZE_BINARY, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::INTERVAL_MONTHS, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL128, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL256, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, {null(), boolean()}, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, NumericTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, TemporalTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, BaseBinaryTypes(), func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::FIXED_SIZE_BINARY, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::INTERVAL_MONTHS, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL128, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL256, func.get());
   // Add the SIMD variants for min max
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
new file mode 100644
index 0000000000000..f2151e0a9e029
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
@@ -0,0 +1,1025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// .inc.cc file to be included in compilation unit where kernels are meant to be
+// compiled auto-vectorized by the compiler with different SIMD levels passed
+// as compiler flags.
+//
+// It contains no includes to avoid double inclusion in the compilation unit
+// that includes this .inc.cc file.
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/align_util.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/decimal.h"
+
+namespace arrow::compute::internal {
+namespace {
+
+// ----------------------------------------------------------------------
+// Sum implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel,
+          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
+struct SumImpl : public ScalarAggregator {
+  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
+  using CType = typename TypeTraits<ArrowType>::CType;
+  using SumType = ResultType;
+  using SumCType = typename TypeTraits<SumType>::CType;
+  using OutputType = typename TypeTraits<SumType>::ScalarType;
+
+  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
+      : out_type(std::move(out_type)), options(std::move(options_)) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      const ArraySpan& data = batch[0].array;
+      this->count += data.length - data.GetNullCount();
+      this->nulls_observed = this->nulls_observed || data.GetNullCount();
+
+      if (!options.skip_nulls && this->nulls_observed) {
+        // Short-circuit
+        return Status::OK();
+      }
+
+      if (is_boolean_type<ArrowType>::value) {
+        this->sum += GetTrueCount(data);
+      } else {
+        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
+      }
+    } else {
+      const Scalar& data = *batch[0].scalar;
+      this->count += data.is_valid * batch.length;
+      this->nulls_observed = this->nulls_observed || !data.is_valid;
+      if (data.is_valid) {
+        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->count += other.count;
+    this->sum += other.sum;
+    this->nulls_observed = this->nulls_observed || other.nulls_observed;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<OutputType>(out_type);
+    } else {
+      out->value = std::make_shared<OutputType>(this->sum, out_type);
+    }
+    return Status::OK();
+  }
+
+  size_t count = 0;
+  bool nulls_observed = false;
+  SumCType sum = 0;
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullImpl : public ScalarAggregator {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
+      // If the batch is a scalar or an array with elements, set is_empty to false
+      is_empty = false;
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const NullImpl&>(src);
+    this->is_empty &= other.is_empty;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
+      // Return 0 if the remaining data is empty
+      out->value = output_empty();
+    } else {
+      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
+    }
+    return Status::OK();
+  }
+
+  virtual std::shared_ptr<Scalar> output_empty() = 0;
+
+  bool is_empty = true;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullSumImpl : public NullImpl<ArrowType> {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullSumImpl(const ScalarAggregateOptions& options_)
+      : NullImpl<ArrowType>(options_) {}
+
+  std::shared_ptr<Scalar> output_empty() override {
+    return std::make_shared<ScalarType>(0);
+  }
+};
+
+template <template <typename> class KernelClass>
+struct SumLikeInit {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  std::shared_ptr<DataType> type;
+  const ScalarAggregateOptions& options;
+
+  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+              const ScalarAggregateOptions& options)
+      : ctx(ctx), type(type), options(options) {}
+
+  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
+
+  Status Visit(const HalfFloatType&) {
+    return Status::NotImplemented("No sum implemented");
+  }
+
+  Status Visit(const BooleanType&) {
+    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
+    state.reset(new KernelClass<BooleanType>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_number<Type, Status> Visit(const Type&) {
+    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
+    state.reset(new KernelClass<Type>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_decimal<Type, Status> Visit(const Type&) {
+    state.reset(new KernelClass<Type>(type, options));
+    return Status::OK();
+  }
+
+  virtual Status Visit(const NullType&) {
+    state.reset(new NullSumImpl<Int64Type>(options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(*type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MeanImpl;
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
+    : public SumImpl<ArrowType, SimdLevel> {
+  using SumImpl<ArrowType, SimdLevel>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel>::options;
+  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
+  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count) || (this->count == 0)) {
+      out->value = std::make_shared<OutputType>(this->out_type);
+    } else {
+      SumCType quotient, remainder;
+      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
+      // Round the decimal result based on the remainder
+      remainder.Abs();
+      if (remainder * 2 >= this->count) {
+        if (this->sum >= 0) {
+          quotient += 1;
+        } else {
+          quotient -= 1;
+        }
+      }
+      out->value = std::make_shared<OutputType>(quotient, this->out_type);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel,
+                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
+    // Override the ResultType of SumImpl because we need to use double for intermediate
+    // sum to prevent integer overflows
+    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<DoubleScalar>();
+    } else {
+      static_assert(std::is_same_v<decltype(this->sum), double>,
+                    "SumCType must be double for numeric inputs");
+      const double mean = this->sum / this->count;
+      out->value = std::make_shared<DoubleScalar>(mean);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <template <typename> class KernelClass>
+struct MeanKernelInit : public SumLikeInit<KernelClass> {
+  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+                 const ScalarAggregateOptions& options)
+      : SumLikeInit<KernelClass>(ctx, type, options) {}
+
+  Status Visit(const NullType&) override {
+    this->state.reset(new NullSumImpl<DoubleType>(this->options));
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// FirstLast implementation
+
+template <typename ArrowType, typename Enable = void>
+struct FirstLastState {};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = false;
+  T last = false;
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+
+  // These are updated in ConsumeScalar and ConsumeArray since null values don't
+  // invoke MergeOne
+  bool first_is_null = false;
+  bool last_is_null = false;
+  // has_any_values indicates whether there is any value (either null or non-null)
+  // (1) has_any_values = false: There is no value aggregated
+  // (2) has_any_values = true, has_values = false: There are only null values aggregated
+  // (3) has_any_values = true, has_values = true: There are both null and non-null values
+  // aggregated
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType,
+                      enable_if_t<is_base_binary_type<ArrowType>::value ||
+                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!has_values) {
+      first = std::string(value);
+      has_values = true;
+    }
+    last = std::string(value);
+  }
+
+  std::string first = "";
+  std::string last = "";
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = FirstLastImpl<ArrowType>;
+  using StateType = FirstLastState<ArrowType>;
+
+  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    this->state.has_any_values = true;
+    if (scalar.is_valid) {
+      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    } else {
+      if (!this->state.has_values) {
+        this->state.first_is_null = true;
+      }
+    }
+    this->count += scalar.is_valid;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    this->state.has_any_values = true;
+    ArrayType arr(arr_span.ToArrayData());
+    const auto null_count = arr.null_count();
+    this->count += arr.length() - null_count;
+
+    if (null_count == 0) {
+      // If there are no null values, we can just merge
+      // the first and last element
+      this->state.MergeOne(arr.GetView(0));
+      this->state.MergeOne(arr.GetView(arr.length() - 1));
+    } else {
+      int64_t first_i = -1;
+      int64_t last_i = -1;
+
+      if (!this->state.has_values && arr.IsNull(0)) {
+        this->state.first_is_null = true;
+      }
+
+      if (arr.IsNull(arr.length() - 1)) {
+        this->state.last_is_null = true;
+      }
+
+      // Find the first and last non-null value and update state
+      for (int64_t i = 0; i < arr.length(); i++) {
+        if (!arr.IsNull(i)) {
+          first_i = i;
+          break;
+        }
+      }
+      if (first_i >= 0) {
+        for (int64_t i = arr.length() - 1; i >= 0; i--) {
+          if (!arr.IsNull(i)) {
+            last_i = i;
+            break;
+          }
+        }
+        assert(last_i >= first_i);
+        this->state.MergeOne(arr.GetView(first_i));
+        this->state.MergeOne(arr.GetView(last_i));
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+    auto null_scalar = MakeNullScalar(child_type);
+
+    std::vector<std::shared_ptr<Scalar>> values;
+
+    if (this->count < options.min_count) {
+      values = {null_scalar, null_scalar};
+    } else {
+      if (state.has_values) {
+        if (options.skip_nulls) {
+          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
+          values = {first_scalar, last_scalar};
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              auto first_scalar,
+              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(
+              auto last_scalar,
+              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
+
+          values = {first_scalar, last_scalar};
+        }
+      } else {
+        // If there is no non-null values, we always output null regardless of
+        // skip_null
+        values = {null_scalar, null_scalar};
+      }
+    }
+
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  FirstLastState<ArrowType> state;
+};
+
+struct FirstLastInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
+                     const std::shared_ptr<DataType>& out_type,
+                     const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// MinMax implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MinMaxState {};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = this->min && rhs.min;
+    this->max = this->max || rhs.max;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = this->min && value;
+    this->max = this->max || value;
+  }
+
+  T min = true;
+  T max = false;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::min();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::fmin(this->min, rhs.min);
+    this->max = std::fmax(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::fmin(this->min, value);
+    this->max = std::fmax(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::infinity();
+  T max = -std::numeric_limits<T>::infinity();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename TypeTraits<ArrowType>::CType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
+  }
+
+  void MergeOne(const T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min;
+  T max;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel,
+                   enable_if_t<is_base_binary_type<ArrowType>::value ||
+                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    if (!this->seen && rhs.seen) {
+      this->min = rhs.min;
+      this->max = rhs.max;
+    } else if (this->seen && rhs.seen) {
+      if (this->min > rhs.min) {
+        this->min = rhs.min;
+      }
+      if (this->max < rhs.max) {
+        this->max = rhs.max;
+      }
+    }
+    this->has_nulls |= rhs.has_nulls;
+    this->seen |= rhs.seen;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!seen) {
+      this->min = std::string(value);
+      this->max = std::string(value);
+    } else {
+      if (value < std::string_view(this->min)) {
+        this->min = std::string(value);
+      } else if (value > std::string_view(this->max)) {
+        this->max = std::string(value);
+      }
+    }
+    this->seen = true;
+  }
+
+  std::string min;
+  std::string max;
+  bool has_nulls = false;
+  bool seen = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
+  using StateType = MinMaxState<ArrowType, SimdLevel>;
+
+  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    StateType local;
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+
+    if (!local.has_nulls || options.skip_nulls) {
+      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    StateType local;
+
+    ArrayType arr(arr_span.ToArrayData());
+
+    const auto null_count = arr.null_count();
+    local.has_nulls = null_count > 0;
+    this->count += arr.length() - null_count;
+
+    if (!local.has_nulls) {
+      for (int64_t i = 0; i < arr.length(); i++) {
+        local.MergeOne(arr.GetView(i));
+      }
+    } else if (local.has_nulls && options.skip_nulls) {
+      local += ConsumeWithNulls(arr);
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+
+    std::vector<std::shared_ptr<Scalar>> values;
+    // Physical type != result type
+    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
+      // (null, null)
+      auto null_scalar = MakeNullScalar(child_type);
+      values = {null_scalar, null_scalar};
+    } else {
+      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
+                            MakeScalar(child_type, std::move(state.min)));
+      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
+                            MakeScalar(child_type, std::move(state.max)));
+      values = {std::move(min_scalar), std::move(max_scalar)};
+    }
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  MinMaxState<ArrowType, SimdLevel> state;
+
+ private:
+  StateType ConsumeWithNulls(const ArrayType& arr) const {
+    StateType local;
+    const int64_t length = arr.length();
+    int64_t offset = arr.offset();
+    const uint8_t* bitmap = arr.null_bitmap_data();
+    int64_t idx = 0;
+
+    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
+    // First handle the leading bits
+    const int64_t leading_bits = p.leading_bits;
+    while (idx < leading_bits) {
+      if (bit_util::GetBit(bitmap, offset)) {
+        local.MergeOne(arr.GetView(idx));
+      }
+      idx++;
+      offset++;
+    }
+
+    // The aligned parts scanned with BitBlockCounter
+    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
+    auto current_block = data_counter.NextWord();
+    while (idx < length) {
+      if (current_block.AllSet()) {  // All true values
+        int run_length = 0;
+        // Scan forward until a block that has some false values (or the end)
+        while (current_block.length > 0 && current_block.AllSet()) {
+          run_length += current_block.length;
+          current_block = data_counter.NextWord();
+        }
+        for (int64_t i = 0; i < run_length; i++) {
+          local.MergeOne(arr.GetView(idx + i));
+        }
+        idx += run_length;
+        offset += run_length;
+        // The current_block already computed, advance to next loop
+        continue;
+      } else if (!current_block.NoneSet()) {  // Some values are null
+        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
+        for (int64_t i = 0; i < current_block.length; i++) {
+          if (reader.IsSet()) {
+            local.MergeOne(arr.GetView(idx + i));
+          }
+          reader.Next();
+        }
+
+        idx += current_block.length;
+        offset += current_block.length;
+      } else {  // All null values
+        idx += current_block.length;
+        offset += current_block.length;
+      }
+      current_block = data_counter.NextWord();
+    }
+
+    return local;
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
+  using MinMaxImpl<BooleanType, SimdLevel>::options;
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
+      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
+    }
+    StateType local;
+    ArrayType arr(batch[0].array.ToArrayData());
+
+    const auto arr_length = arr.length();
+    const auto null_count = arr.null_count();
+    const auto valid_count = arr_length - null_count;
+
+    local.has_nulls = null_count > 0;
+    this->count += valid_count;
+    if (!local.has_nulls || options.skip_nulls) {
+      const auto true_count = arr.true_count();
+      const auto false_count = valid_count - true_count;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeScalar(const BooleanScalar& scalar) {
+    StateType local;
+
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+    if (!local.has_nulls || options.skip_nulls) {
+      const int true_count = scalar.is_valid && scalar.value;
+      const int false_count = scalar.is_valid && !scalar.value;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+};
+
+struct NullMinMaxImpl : public ScalarAggregator {
+  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
+                                                std::make_shared<NullScalar>()};
+    out->value = std::make_shared<StructScalar>(
+        std::move(values), struct_({field("min", null()), field("max", null())}));
+    return Status::OK();
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct MinMaxInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
+                  const std::shared_ptr<DataType>& out_type,
+                  const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const NullType&) {
+    state.reset(new NullMinMaxImpl());
+    return Status::OK();
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_floating_point<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+}  // namespace
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index 03b45107eeca1..a1a6a95c5e11c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {
   using SumImpl<ArrowType, SimdLevel::AVX2>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
-  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(
@@ -42,6 +43,24 @@ Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
+  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
                                                   const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx2> visitor(
@@ -50,9 +69,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,15 +90,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
-}
-
-void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
-}
+}  // namespace
 
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func) {
   // Enable int types for AVX2 variants.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 05356e0aa5e75..9dc490937a691 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {
   using SumImpl<ArrowType, SimdLevel::AVX512>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
-  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx512> visitor(
@@ -42,6 +43,26 @@ Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
+                     SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
+                     SimdLevel::AVX512);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
+  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx512> visitor(
@@ -50,9 +71,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,17 +92,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
-                     SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
-                     SimdLevel::AVX512);
-}
-
-void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
-}
+}  // namespace
 
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func) {
   // Enable 32/64 int types for avx512 variants, no advantage on 8/16 int.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index f08e7aaa538bb..5cc3a558b1efb 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -17,23 +17,18 @@
 
 #pragma once
 
-#include <cmath>
-#include <type_traits>
-#include <utility>
+#include <memory>
+#include <vector>
 
-#include "arrow/compute/api_aggregate.h"
-#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
-#include "arrow/compute/kernels/common_internal.h"
-#include "arrow/compute/kernels/util_internal.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/align_util.h"
-#include "arrow/util/bit_block_counter.h"
-#include "arrow/util/decimal.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
 
 namespace arrow::compute::internal {
 
+// aggregate_basic.cc
+
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
                         std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
@@ -47,990 +42,16 @@ void AddMinMaxKernel(KernelInit init, internal::detail::GetTypeId get_id,
                      ScalarAggregateFunction* func,
                      SimdLevel::type simd_level = SimdLevel::NONE);
 
-// SIMD variants for kernels
+// aggregate_basic_avx2.cc
+
 void AddSumAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func);
 
+// aggregate_basic_avx512.cc
+
 void AddSumAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func);
 
-// ----------------------------------------------------------------------
-// Sum implementation
-
-template <typename ArrowType, SimdLevel::type SimdLevel,
-          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
-struct SumImpl : public ScalarAggregator {
-  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
-  using CType = typename TypeTraits<ArrowType>::CType;
-  using SumType = ResultType;
-  using SumCType = typename TypeTraits<SumType>::CType;
-  using OutputType = typename TypeTraits<SumType>::ScalarType;
-
-  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
-      : out_type(std::move(out_type)), options(std::move(options_)) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      const ArraySpan& data = batch[0].array;
-      this->count += data.length - data.GetNullCount();
-      this->nulls_observed = this->nulls_observed || data.GetNullCount();
-
-      if (!options.skip_nulls && this->nulls_observed) {
-        // Short-circuit
-        return Status::OK();
-      }
-
-      if (is_boolean_type<ArrowType>::value) {
-        this->sum += GetTrueCount(data);
-      } else {
-        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
-      }
-    } else {
-      const Scalar& data = *batch[0].scalar;
-      this->count += data.is_valid * batch.length;
-      this->nulls_observed = this->nulls_observed || !data.is_valid;
-      if (data.is_valid) {
-        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
-      }
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->count += other.count;
-    this->sum += other.sum;
-    this->nulls_observed = this->nulls_observed || other.nulls_observed;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<OutputType>(out_type);
-    } else {
-      out->value = std::make_shared<OutputType>(this->sum, out_type);
-    }
-    return Status::OK();
-  }
-
-  size_t count = 0;
-  bool nulls_observed = false;
-  SumCType sum = 0;
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullImpl : public ScalarAggregator {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
-      // If the batch is a scalar or an array with elements, set is_empty to false
-      is_empty = false;
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const NullImpl&>(src);
-    this->is_empty &= other.is_empty;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
-      // Return 0 if the remaining data is empty
-      out->value = output_empty();
-    } else {
-      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
-    }
-    return Status::OK();
-  }
-
-  virtual std::shared_ptr<Scalar> output_empty() = 0;
-
-  bool is_empty = true;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullSumImpl : public NullImpl<ArrowType> {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullSumImpl(const ScalarAggregateOptions& options_)
-      : NullImpl<ArrowType>(options_) {}
-
-  std::shared_ptr<Scalar> output_empty() override {
-    return std::make_shared<ScalarType>(0);
-  }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MeanImpl;
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
-    : public SumImpl<ArrowType, SimdLevel> {
-  using SumImpl<ArrowType, SimdLevel>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel>::options;
-  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
-  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count) || (this->count == 0)) {
-      out->value = std::make_shared<OutputType>(this->out_type);
-    } else {
-      SumCType quotient, remainder;
-      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
-      // Round the decimal result based on the remainder
-      remainder.Abs();
-      if (remainder * 2 >= this->count) {
-        if (this->sum >= 0) {
-          quotient += 1;
-        } else {
-          quotient -= 1;
-        }
-      }
-      out->value = std::make_shared<OutputType>(quotient, this->out_type);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel,
-                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
-    // Override the ResultType of SumImpl because we need to use double for intermediate
-    // sum to prevent integer overflows
-    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<DoubleScalar>();
-    } else {
-      static_assert(std::is_same_v<decltype(this->sum), double>,
-                    "SumCType must be double for numeric inputs");
-      const double mean = this->sum / this->count;
-      out->value = std::make_shared<DoubleScalar>(mean);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <template <typename> class KernelClass>
-struct SumLikeInit {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  std::shared_ptr<DataType> type;
-  const ScalarAggregateOptions& options;
-
-  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-              const ScalarAggregateOptions& options)
-      : ctx(ctx), type(type), options(options) {}
-
-  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
-
-  Status Visit(const HalfFloatType&) {
-    return Status::NotImplemented("No sum implemented");
-  }
-
-  Status Visit(const BooleanType&) {
-    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
-    state.reset(new KernelClass<BooleanType>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_number<Type, Status> Visit(const Type&) {
-    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
-    state.reset(new KernelClass<Type>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_decimal<Type, Status> Visit(const Type&) {
-    state.reset(new KernelClass<Type>(type, options));
-    return Status::OK();
-  }
-
-  virtual Status Visit(const NullType&) {
-    state.reset(new NullSumImpl<Int64Type>(options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(*type, this));
-    return std::move(state);
-  }
-};
-
-template <template <typename> class KernelClass>
-struct MeanKernelInit : public SumLikeInit<KernelClass> {
-  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-                 const ScalarAggregateOptions& options)
-      : SumLikeInit<KernelClass>(ctx, type, options) {}
-
-  Status Visit(const NullType&) override {
-    this->state.reset(new NullSumImpl<DoubleType>(this->options));
-    return Status::OK();
-  }
-};
-
-// ----------------------------------------------------------------------
-// FirstLast implementation
-template <typename ArrowType, typename Enable = void>
-struct FirstLastState {};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = false;
-  T last = false;
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-
-  // These are updated in ConsumeScalar and ConsumeArray since null values don't
-  // invoke MergeOne
-  bool first_is_null = false;
-  bool last_is_null = false;
-  // has_any_values indicates whether there is any value (either null or non-null)
-  // (1) has_any_values = false: There is no value aggregated
-  // (2) has_any_values = true, has_values = false: There are only null values aggregated
-  // (3) has_any_values = true, has_values = true: There are both null and non-null values
-  // aggregated
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType,
-                      enable_if_t<is_base_binary_type<ArrowType>::value ||
-                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!has_values) {
-      first = std::string(value);
-      has_values = true;
-    }
-    last = std::string(value);
-  }
-
-  std::string first = "";
-  std::string last = "";
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = FirstLastImpl<ArrowType>;
-  using StateType = FirstLastState<ArrowType>;
-
-  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    this->state.has_any_values = true;
-    if (scalar.is_valid) {
-      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    } else {
-      if (!this->state.has_values) {
-        this->state.first_is_null = true;
-      }
-    }
-    this->count += scalar.is_valid;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    this->state.has_any_values = true;
-    ArrayType arr(arr_span.ToArrayData());
-    const auto null_count = arr.null_count();
-    this->count += arr.length() - null_count;
-
-    if (null_count == 0) {
-      // If there are no null values, we can just merge
-      // the first and last element
-      this->state.MergeOne(arr.GetView(0));
-      this->state.MergeOne(arr.GetView(arr.length() - 1));
-    } else {
-      int64_t first_i = -1;
-      int64_t last_i = -1;
-
-      if (!this->state.has_values && arr.IsNull(0)) {
-        this->state.first_is_null = true;
-      }
-
-      if (arr.IsNull(arr.length() - 1)) {
-        this->state.last_is_null = true;
-      }
-
-      // Find the first and last non-null value and update state
-      for (int64_t i = 0; i < arr.length(); i++) {
-        if (!arr.IsNull(i)) {
-          first_i = i;
-          break;
-        }
-      }
-      if (first_i >= 0) {
-        for (int64_t i = arr.length() - 1; i >= 0; i--) {
-          if (!arr.IsNull(i)) {
-            last_i = i;
-            break;
-          }
-        }
-        DCHECK_GE(last_i, first_i);
-        this->state.MergeOne(arr.GetView(first_i));
-        this->state.MergeOne(arr.GetView(last_i));
-      }
-    }
-
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-    auto null_scalar = MakeNullScalar(child_type);
-
-    std::vector<std::shared_ptr<Scalar>> values;
-
-    if (this->count < options.min_count) {
-      values = {null_scalar, null_scalar};
-    } else {
-      if (state.has_values) {
-        if (options.skip_nulls) {
-          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
-          values = {first_scalar, last_scalar};
-        } else {
-          ARROW_ASSIGN_OR_RAISE(
-              auto first_scalar,
-              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(
-              auto last_scalar,
-              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
-
-          values = {first_scalar, last_scalar};
-        }
-      } else {
-        // If there is no non-null values, we always output null regardless of
-        // skip_null
-        values = {null_scalar, null_scalar};
-      }
-    }
-
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  FirstLastState<ArrowType> state;
-};
-
-// ----------------------------------------------------------------------
-// MinMax implementation
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MinMaxState {};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = this->min && rhs.min;
-    this->max = this->max || rhs.max;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = this->min && value;
-    this->max = this->max || value;
-  }
-
-  T min = true;
-  T max = false;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::max();
-  T max = std::numeric_limits<T>::min();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::fmin(this->min, rhs.min);
-    this->max = std::fmax(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::fmin(this->min, value);
-    this->max = std::fmax(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::infinity();
-  T max = -std::numeric_limits<T>::infinity();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename TypeTraits<ArrowType>::CType;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
-  }
-
-  void MergeOne(const T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min;
-  T max;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel,
-                   enable_if_t<is_base_binary_type<ArrowType>::value ||
-                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    if (!this->seen && rhs.seen) {
-      this->min = rhs.min;
-      this->max = rhs.max;
-    } else if (this->seen && rhs.seen) {
-      if (this->min > rhs.min) {
-        this->min = rhs.min;
-      }
-      if (this->max < rhs.max) {
-        this->max = rhs.max;
-      }
-    }
-    this->has_nulls |= rhs.has_nulls;
-    this->seen |= rhs.seen;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!seen) {
-      this->min = std::string(value);
-      this->max = std::string(value);
-    } else {
-      if (value < std::string_view(this->min)) {
-        this->min = std::string(value);
-      } else if (value > std::string_view(this->max)) {
-        this->max = std::string(value);
-      }
-    }
-    this->seen = true;
-  }
-
-  std::string min;
-  std::string max;
-  bool has_nulls = false;
-  bool seen = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
-  using StateType = MinMaxState<ArrowType, SimdLevel>;
-
-  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    StateType local;
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-
-    if (!local.has_nulls || options.skip_nulls) {
-      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    StateType local;
-
-    ArrayType arr(arr_span.ToArrayData());
-
-    const auto null_count = arr.null_count();
-    local.has_nulls = null_count > 0;
-    this->count += arr.length() - null_count;
-
-    if (!local.has_nulls) {
-      for (int64_t i = 0; i < arr.length(); i++) {
-        local.MergeOne(arr.GetView(i));
-      }
-    } else if (local.has_nulls && options.skip_nulls) {
-      local += ConsumeWithNulls(arr);
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-
-    std::vector<std::shared_ptr<Scalar>> values;
-    // Physical type != result type
-    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
-      // (null, null)
-      auto null_scalar = MakeNullScalar(child_type);
-      values = {null_scalar, null_scalar};
-    } else {
-      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
-                            MakeScalar(child_type, std::move(state.min)));
-      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
-                            MakeScalar(child_type, std::move(state.max)));
-      values = {std::move(min_scalar), std::move(max_scalar)};
-    }
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  MinMaxState<ArrowType, SimdLevel> state;
-
- private:
-  StateType ConsumeWithNulls(const ArrayType& arr) const {
-    StateType local;
-    const int64_t length = arr.length();
-    int64_t offset = arr.offset();
-    const uint8_t* bitmap = arr.null_bitmap_data();
-    int64_t idx = 0;
-
-    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
-    // First handle the leading bits
-    const int64_t leading_bits = p.leading_bits;
-    while (idx < leading_bits) {
-      if (bit_util::GetBit(bitmap, offset)) {
-        local.MergeOne(arr.GetView(idx));
-      }
-      idx++;
-      offset++;
-    }
-
-    // The aligned parts scanned with BitBlockCounter
-    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
-    auto current_block = data_counter.NextWord();
-    while (idx < length) {
-      if (current_block.AllSet()) {  // All true values
-        int run_length = 0;
-        // Scan forward until a block that has some false values (or the end)
-        while (current_block.length > 0 && current_block.AllSet()) {
-          run_length += current_block.length;
-          current_block = data_counter.NextWord();
-        }
-        for (int64_t i = 0; i < run_length; i++) {
-          local.MergeOne(arr.GetView(idx + i));
-        }
-        idx += run_length;
-        offset += run_length;
-        // The current_block already computed, advance to next loop
-        continue;
-      } else if (!current_block.NoneSet()) {  // Some values are null
-        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
-        for (int64_t i = 0; i < current_block.length; i++) {
-          if (reader.IsSet()) {
-            local.MergeOne(arr.GetView(idx + i));
-          }
-          reader.Next();
-        }
-
-        idx += current_block.length;
-        offset += current_block.length;
-      } else {  // All null values
-        idx += current_block.length;
-        offset += current_block.length;
-      }
-      current_block = data_counter.NextWord();
-    }
-
-    return local;
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
-  using StateType = MinMaxState<BooleanType, SimdLevel>;
-  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
-  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
-  using MinMaxImpl<BooleanType, SimdLevel>::options;
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
-      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
-    }
-    StateType local;
-    ArrayType arr(batch[0].array.ToArrayData());
-
-    const auto arr_length = arr.length();
-    const auto null_count = arr.null_count();
-    const auto valid_count = arr_length - null_count;
-
-    local.has_nulls = null_count > 0;
-    this->count += valid_count;
-    if (!local.has_nulls || options.skip_nulls) {
-      const auto true_count = arr.true_count();
-      const auto false_count = valid_count - true_count;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeScalar(const BooleanScalar& scalar) {
-    StateType local;
-
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-    if (!local.has_nulls || options.skip_nulls) {
-      const int true_count = scalar.is_valid && scalar.value;
-      const int false_count = scalar.is_valid && !scalar.value;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-};
-
-struct NullMinMaxImpl : public ScalarAggregator {
-  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
-                                                std::make_shared<NullScalar>()};
-    out->value = std::make_shared<StructScalar>(
-        std::move(values), struct_({field("min", null()), field("max", null())}));
-    return Status::OK();
-  }
-};
-
-// First/Last
-
-struct FirstLastInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
-                     const std::shared_ptr<DataType>& out_type,
-                     const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct MinMaxInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type,
-                  const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const NullType&) {
-    state.reset(new NullMinMaxImpl());
-    return Status::OK();
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_floating_point<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
 }  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/light_array_internal.cc b/cpp/src/arrow/compute/light_array_internal.cc
index 4f235925d0fb6..e4b1f1b8cdd63 100644
--- a/cpp/src/arrow/compute/light_array_internal.cc
+++ b/cpp/src/arrow/compute/light_array_internal.cc
@@ -118,10 +118,9 @@ Result<KeyColumnMetadata> ColumnMetadataFromDataType(
     const std::shared_ptr<DataType>& type) {
   const bool is_extension = type->id() == Type::EXTENSION;
   const std::shared_ptr<DataType>& typ =
-      is_extension
-          ? arrow::internal::checked_pointer_cast<ExtensionType>(type->GetSharedPtr())
-                ->storage_type()
-          : type;
+      is_extension ? arrow::internal::checked_cast<const ExtensionType*>(type.get())
+                         ->storage_type()
+                   : type;
 
   if (typ->id() == Type::DICTIONARY) {
     auto bit_width =
@@ -205,22 +204,25 @@ Status ColumnArraysFromExecBatch(const ExecBatch& batch,
                                    column_arrays);
 }
 
-void ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
-                              MemoryPool* pool, int log_num_rows_min) {
+Status ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
+                                MemoryPool* pool, int log_num_rows_min) {
 #ifndef NDEBUG
   if (num_rows_allocated_ > 0) {
-    ARROW_DCHECK(data_type_ != NULLPTR);
-    KeyColumnMetadata metadata_before =
-        ColumnMetadataFromDataType(data_type_).ValueOrDie();
-    KeyColumnMetadata metadata_after = ColumnMetadataFromDataType(data_type).ValueOrDie();
+    ARROW_DCHECK(data_type_ != nullptr);
+    const KeyColumnMetadata& metadata_before = column_metadata_;
+    ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata metadata_after,
+                          ColumnMetadataFromDataType(data_type));
     ARROW_DCHECK(metadata_before.is_fixed_length == metadata_after.is_fixed_length &&
                  metadata_before.fixed_length == metadata_after.fixed_length);
   }
 #endif
+  ARROW_DCHECK(data_type != nullptr);
+  ARROW_ASSIGN_OR_RAISE(column_metadata_, ColumnMetadataFromDataType(data_type));
   Clear(/*release_buffers=*/false);
   log_num_rows_min_ = log_num_rows_min;
   data_type_ = data_type;
   pool_ = pool;
+  return Status::OK();
 }
 
 void ResizableArrayData::Clear(bool release_buffers) {
@@ -246,8 +248,6 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     num_rows_allocated_new *= 2;
   }
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
   if (buffers_[kFixedLengthBuffer] == NULLPTR) {
     ARROW_DCHECK(buffers_[kValidityBuffer] == NULLPTR &&
                  buffers_[kVariableLengthBuffer] == NULLPTR);
@@ -258,8 +258,8 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes, pool_));
     memset(mutable_data(kValidityBuffer), 0,
            bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes);
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
@@ -271,7 +271,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
-                num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes,
+                num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes,
                 pool_));
       }
     } else {
@@ -300,15 +300,15 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     memset(mutable_data(kValidityBuffer) + bytes_for_bits_before, 0,
            bytes_for_bits_after - bytes_for_bits_before);
 
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes));
         memset(mutable_data(kFixedLengthBuffer) + bytes_for_bits_before, 0,
                bytes_for_bits_after - bytes_for_bits_before);
       } else {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
-            num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes));
+            num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes));
       }
     } else {
       RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
@@ -323,10 +323,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
 }
 
 Status ResizableArrayData::ResizeVaryingLengthBuffer() {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  if (!column_metadata.is_fixed_length) {
+  if (!column_metadata_.is_fixed_length) {
     int64_t min_new_size = buffers_[kFixedLengthBuffer]->data_as<int32_t>()[num_rows_];
     ARROW_DCHECK(var_len_buf_size_ > 0);
     if (var_len_buf_size_ < min_new_size) {
@@ -343,23 +340,19 @@ Status ResizableArrayData::ResizeVaryingLengthBuffer() {
 }
 
 KeyColumnArray ResizableArrayData::column_array() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-  return KeyColumnArray(column_metadata, num_rows_,
+  return KeyColumnArray(column_metadata_, num_rows_,
                         buffers_[kValidityBuffer]->mutable_data(),
                         buffers_[kFixedLengthBuffer]->mutable_data(),
                         buffers_[kVariableLengthBuffer]->mutable_data());
 }
 
 std::shared_ptr<ArrayData> ResizableArrayData::array_data() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  auto valid_count = arrow::internal::CountSetBits(
-      buffers_[kValidityBuffer]->data(), /*offset=*/0, static_cast<int64_t>(num_rows_));
+  auto valid_count =
+      arrow::internal::CountSetBits(buffers_[kValidityBuffer]->data(), /*bit_offset=*/0,
+                                    static_cast<int64_t>(num_rows_));
   int null_count = static_cast<int>(num_rows_) - static_cast<int>(valid_count);
 
-  if (column_metadata.is_fixed_length) {
+  if (column_metadata_.is_fixed_length) {
     return ArrayData::Make(data_type_, num_rows_,
                            {buffers_[kValidityBuffer], buffers_[kFixedLengthBuffer]},
                            null_count);
@@ -493,10 +486,12 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
   ARROW_DCHECK(num_rows_before >= 0);
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target->num_rows() == 0) {
-    target->Init(source->type, pool, kLogNumRows);
+    RETURN_NOT_OK(target->Init(source->type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target->ResizeFixedLengthBuffers(num_rows_after));
 
+  // Since target->Init is called before, we can assume that the ColumnMetadata
+  // would never fail to be created
   KeyColumnMetadata column_metadata =
       ColumnMetadataFromDataType(source->type).ValueOrDie();
 
@@ -647,11 +642,12 @@ Status ExecBatchBuilder::AppendNulls(const std::shared_ptr<DataType>& type,
   int num_rows_before = target.num_rows();
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target.num_rows() == 0) {
-    target.Init(type, pool, kLogNumRows);
+    RETURN_NOT_OK(target.Init(type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target.ResizeFixedLengthBuffers(num_rows_after));
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(type).ValueOrDie();
+  ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata column_metadata,
+                        ColumnMetadataFromDataType(type));
 
   // Process fixed length buffer
   //
@@ -708,7 +704,7 @@ Status ExecBatchBuilder::AppendSelected(MemoryPool* pool, const ExecBatch& batch
       const Datum& data = batch.values[col_ids ? col_ids[i] : i];
       ARROW_DCHECK(data.is_array());
       const std::shared_ptr<ArrayData>& array_data = data.array();
-      values_[i].Init(array_data->type, pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(array_data->type, pool, kLogNumRows));
     }
   }
 
@@ -739,7 +735,7 @@ Status ExecBatchBuilder::AppendNulls(MemoryPool* pool,
   if (values_.empty()) {
     values_.resize(types.size());
     for (size_t i = 0; i < types.size(); ++i) {
-      values_[i].Init(types[i], pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(types[i], pool, kLogNumRows));
     }
   }
 
diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h
index 995c4211998e0..5adb06e540009 100644
--- a/cpp/src/arrow/compute/light_array_internal.h
+++ b/cpp/src/arrow/compute/light_array_internal.h
@@ -65,12 +65,12 @@ struct ARROW_EXPORT KeyColumnMetadata {
   /// If this is true the column will have a validity buffer and
   /// a data buffer and the third buffer will be unused.
   bool is_fixed_length;
-  /// \brief True if this column is the null type
+  /// \brief True if this column is the null type(NA).
   bool is_null_type;
   /// \brief The number of bytes for each item
   ///
   /// Zero has a special meaning, indicating a bit vector with one bit per value if it
-  /// isn't a null type column.
+  /// isn't a null type column. Generally, this means that the column is a boolean type.
   ///
   /// For a varying-length binary column this represents the number of bytes per offset.
   uint32_t fixed_length;
@@ -295,8 +295,8 @@ class ARROW_EXPORT ResizableArrayData {
   /// \param pool The pool to make allocations on
   /// \param log_num_rows_min All resize operations will allocate at least enough
   ///                         space for (1 << log_num_rows_min) rows
-  void Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
-            int log_num_rows_min);
+  Status Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
+              int log_num_rows_min);
 
   /// \brief Resets the array back to an empty state
   /// \param release_buffers If true then allocated memory is released and the
@@ -351,6 +351,8 @@ class ARROW_EXPORT ResizableArrayData {
   static constexpr int64_t kNumPaddingBytes = 64;
   int log_num_rows_min_;
   std::shared_ptr<DataType> data_type_;
+  // Would be valid if data_type_ != NULLPTR.
+  KeyColumnMetadata column_metadata_{};
   MemoryPool* pool_;
   int num_rows_;
   int num_rows_allocated_;
@@ -403,7 +405,7 @@ class ARROW_EXPORT ExecBatchBuilder {
 
   int num_rows() const { return values_.empty() ? 0 : values_[0].num_rows(); }
 
-  static int num_rows_max() { return 1 << kLogNumRows; }
+  static constexpr int num_rows_max() { return 1 << kLogNumRows; }
 
  private:
   static constexpr int kLogNumRows = 15;
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index cc02d489d138f..98a1ab8b7acae 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -295,7 +295,7 @@ TEST(ResizableArrayData, Basic) {
         arrow::internal::checked_pointer_cast<FixedWidthType>(type)->bit_width() / 8;
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/16);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/16));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
@@ -330,7 +330,7 @@ TEST(ResizableArrayData, Binary) {
     ARROW_SCOPED_TRACE("Type: ", type->ToString());
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/4);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/4));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.cc b/cpp/src/arrow/compute/row/row_encoder_internal.cc
index 414cc6793a5a3..0965e4e8f9571 100644
--- a/cpp/src/arrow/compute/row/row_encoder_internal.cc
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.cc
@@ -145,41 +145,37 @@ void FixedWidthKeyEncoder::AddLengthNull(int32_t* length) {
 
 Status FixedWidthKeyEncoder::Encode(const ExecValue& data, int64_t batch_length,
                                     uint8_t** encoded_bytes) {
+  auto handle_next_valid_value = [&](std::string_view bytes) {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kValidByte;
+    memcpy(encoded_ptr, bytes.data(), byte_width_);
+    encoded_ptr += byte_width_;
+  };
+  auto handle_next_null_value = [&] {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kNullByte;
+    memset(encoded_ptr, 0, byte_width_);
+    encoded_ptr += byte_width_;
+  };
   if (data.is_array()) {
     ArraySpan viewed = data.array;
+    // The original type might not be FixedSizeBinaryType, but it would
+    // treat the input as binary data.
     auto view_ty = fixed_size_binary(byte_width_);
     viewed.type = view_ty.get();
-    VisitArraySpanInline<FixedSizeBinaryType>(
-        viewed,
-        [&](std::string_view bytes) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          memcpy(encoded_ptr, bytes.data(), byte_width_);
-          encoded_ptr += byte_width_;
-        },
-        [&] {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          memset(encoded_ptr, 0, byte_width_);
-          encoded_ptr += byte_width_;
-        });
+    VisitArraySpanInline<FixedSizeBinaryType>(viewed, handle_next_valid_value,
+                                              handle_next_null_value);
   } else {
     const auto& scalar = data.scalar_as<arrow::internal::PrimitiveScalarBase>();
     if (scalar.is_valid) {
-      const std::string_view data = scalar.view();
-      DCHECK_EQ(data.size(), static_cast<size_t>(byte_width_));
+      const std::string_view scalar_data = scalar.view();
+      DCHECK_EQ(scalar_data.size(), static_cast<size_t>(byte_width_));
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kValidByte;
-        memcpy(encoded_ptr, data.data(), data.size());
-        encoded_ptr += byte_width_;
+        handle_next_valid_value(scalar_data);
       }
     } else {
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kNullByte;
-        memset(encoded_ptr, 0, byte_width_);
-        encoded_ptr += byte_width_;
+        handle_next_null_value();
       }
     }
   }
@@ -267,11 +263,11 @@ void RowEncoder::Init(const std::vector<TypeHolder>& column_types, ExecContext*
 
   for (size_t i = 0; i < column_types.size(); ++i) {
     const bool is_extension = column_types[i].id() == Type::EXTENSION;
-    const TypeHolder& type = is_extension
-                                 ? arrow::internal::checked_pointer_cast<ExtensionType>(
-                                       column_types[i].GetSharedPtr())
-                                       ->storage_type()
-                                 : column_types[i];
+    const TypeHolder& type =
+        is_extension
+            ? arrow::internal::checked_cast<const ExtensionType*>(column_types[i].type)
+                  ->storage_type()
+            : column_types[i];
 
     if (is_extension) {
       extension_types_[i] = arrow::internal::checked_pointer_cast<ExtensionType>(
@@ -379,7 +375,7 @@ Result<ExecBatch> RowEncoder::Decode(int64_t num_rows, const int32_t* row_ids) {
       ARROW_ASSIGN_OR_RAISE(out.values[i], ::arrow::internal::GetArrayView(
                                                column_array_data, extension_types_[i]))
     } else {
-      out.values[i] = column_array_data;
+      out.values[i] = std::move(column_array_data);
     }
   }
 
diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.h b/cpp/src/arrow/compute/row/row_encoder_internal.h
index 60eb14af504f7..4d6cc34af2342 100644
--- a/cpp/src/arrow/compute/row/row_encoder_internal.h
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.h
@@ -38,16 +38,41 @@ struct ARROW_EXPORT KeyEncoder {
 
   virtual ~KeyEncoder() = default;
 
+  // Increment the values in the lengths array by the length of the encoded key for the
+  // corresponding value in the given column.
+  //
+  // Generally if Encoder is for a fixed-width type, the length of the encoded key
+  // would add ExtraByteForNull + byte_width.
+  // If Encoder is for a variable-width type, the length would add ExtraByteForNull +
+  // sizeof(Offset) + buffer_size.
+  // If Encoder is for null type, the length would add 0.
   virtual void AddLength(const ExecValue& value, int64_t batch_length,
                          int32_t* lengths) = 0;
 
+  // Increment the length by the length of an encoded null value.
+  // It's a special case for AddLength like `AddLength(Null-Scalar, 1, lengths)`.
   virtual void AddLengthNull(int32_t* length) = 0;
 
+  // Encode the column into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // If value is an array, the array-size should be batch_length.
+  // If value is a scalar, the value would repeat batch_length times.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual Status Encode(const ExecValue&, int64_t batch_length,
                         uint8_t** encoded_bytes) = 0;
 
+  // Encode a null value into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // It's a special case for Encode like `Encode(Null-Scalar, 1, encoded_bytes)`.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual void EncodeNull(uint8_t** encoded_bytes) = 0;
 
+  // Decode the encoded key from the encoded_bytes, which is an array of pointers to each
+  // row buffer, into an ArrayData.
+  //
+  // NB: The pointers in the encoded_bytes will be advanced as values being decoded from.
   virtual Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes,
                                                     int32_t length, MemoryPool*) = 0;
 
@@ -94,7 +119,7 @@ struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder {
                                             MemoryPool* pool) override;
 
   std::shared_ptr<DataType> type_;
-  int byte_width_;
+  const int byte_width_;
 };
 
 struct ARROW_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder {
@@ -118,6 +143,7 @@ struct ARROW_EXPORT VarLengthKeyEncoder : KeyEncoder {
   void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override {
     if (data.is_array()) {
       int64_t i = 0;
+      ARROW_DCHECK_EQ(data.array.length, batch_length);
       VisitArraySpanInline<T>(
           data.array,
           [&](std::string_view bytes) {
@@ -142,41 +168,34 @@ struct ARROW_EXPORT VarLengthKeyEncoder : KeyEncoder {
 
   Status Encode(const ExecValue& data, int64_t batch_length,
                 uint8_t** encoded_bytes) override {
+    auto handle_next_valid_value = [&encoded_bytes](std::string_view bytes) {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kValidByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
+      encoded_ptr += sizeof(Offset);
+      memcpy(encoded_ptr, bytes.data(), bytes.size());
+      encoded_ptr += bytes.size();
+    };
+    auto handle_next_null_value = [&encoded_bytes]() {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kNullByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(0));
+      encoded_ptr += sizeof(Offset);
+    };
     if (data.is_array()) {
-      VisitArraySpanInline<T>(
-          data.array,
-          [&](std::string_view bytes) {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kValidByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-            encoded_ptr += sizeof(Offset);
-            memcpy(encoded_ptr, bytes.data(), bytes.size());
-            encoded_ptr += bytes.size();
-          },
-          [&] {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kNullByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-            encoded_ptr += sizeof(Offset);
-          });
+      DCHECK_EQ(data.length(), batch_length);
+      VisitArraySpanInline<T>(data.array, handle_next_valid_value,
+                              handle_next_null_value);
     } else {
       const auto& scalar = data.scalar_as<BaseBinaryScalar>();
       if (scalar.is_valid) {
-        const auto& bytes = *scalar.value;
+        const auto bytes = std::string_view{*scalar.value};
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-          encoded_ptr += sizeof(Offset);
-          memcpy(encoded_ptr, bytes.data(), bytes.size());
-          encoded_ptr += bytes.size();
+          handle_next_valid_value(bytes);
         }
       } else {
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-          encoded_ptr += sizeof(Offset);
+          handle_next_null_value();
         }
       }
     }
@@ -250,6 +269,68 @@ struct ARROW_EXPORT NullKeyEncoder : KeyEncoder {
   }
 };
 
+/// RowEncoder encodes ExecSpan to a variable length byte sequence
+/// created by concatenating the encoded form of each column. The encoding
+/// for each column depends on its data type.
+///
+/// This is used to encode columns into row-major format, which will be
+/// beneficial for grouping and joining operations.
+///
+/// Unlike DuckDB and arrow-rs, currently this row format can not help
+/// sortings because the row-format is uncomparable.
+///
+/// # Key Column Encoding
+///
+/// The row format is composed of the the KeyColumn encodings for each,
+/// and the column is encoded as follows:
+/// 1. A null byte for each column, indicating whether the column is null.
+///    "1" for null, "0" for non-null.
+/// 2. The "fixed width" encoding for the column, it would exist whether
+///    the column is null or not.
+/// 3. The "variable payload" encoding for the column, it would exists only
+///    for non-null string/binary columns.
+///    For string/binary columns, the length of the payload is in
+///    "fixed width" part, and the binary contents are in the
+///    "variable payload" part.
+/// 4. Specially, if all columns in a row are null, the caller may decide
+///    to refer to kRowIdForNulls instead of actually encoding/decoding
+///    it using any KeyEncoder. See the comment for encoded_nulls_.
+///
+/// The endianness of the encoded bytes is platform-dependent.
+///
+/// ## Null Type
+///
+/// Null Type is a special case, it doesn't occupy any space in the
+/// encoded row.
+///
+/// ## Fixed Width Type
+///
+/// Fixed Width Type is encoded as a fixed-width byte sequence. For example:
+/// ```
+/// Int8: 5, null, 6
+/// ```
+/// Would be encoded as [0 5], [1 0], [0 6].
+///
+/// ### Dictionary Type
+///
+/// Dictionary Type is encoded as a fixed-width byte sequence using
+/// dictionary  indices, the dictionary should be identical for all
+/// rows.
+///
+/// ## Variable Width Type
+///
+/// Variable Width Type is encoded as:
+/// [null byte, variable-byte length, variable bytes]. For example:
+///
+/// String "abc" Would be encoded as:
+/// 0 ( 1 byte for not null) + 3 ( 4 bytes for length ) + "abc" (payload)
+///
+/// Null string Would be encoded as:
+/// 1 ( 1 byte for null) + 0 ( 4 bytes for length )
+///
+/// # Row Encoding
+///
+/// The row format is the concatenation of the encodings of each column.
 class ARROW_EXPORT RowEncoder {
  public:
   static constexpr int kRowIdForNulls() { return -1; }
@@ -259,6 +340,9 @@ class ARROW_EXPORT RowEncoder {
   Status EncodeAndAppend(const ExecSpan& batch);
   Result<ExecBatch> Decode(int64_t num_rows, const int32_t* row_ids);
 
+  // Returns the encoded representation of the row at index i.
+  // If i is kRowIdForNulls, it returns the pre-encoded all-nulls
+  // row.
   inline std::string encoded_row(int32_t i) const {
     if (i == kRowIdForNulls()) {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
@@ -270,14 +354,26 @@ class ARROW_EXPORT RowEncoder {
   }
 
   int32_t num_rows() const {
-    return offsets_.size() == 0 ? 0 : static_cast<int32_t>(offsets_.size() - 1);
+    return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
   ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // offsets_ vector stores the starting position (offset) of each encoded row
+  // within the bytes_ vector. This allows for quick access to individual rows.
+  //
+  // The size would be num_rows + 1 if not empty, the last element is the total
+  // length of the bytes_ vector.
   std::vector<int32_t> offsets_;
+  // The encoded bytes of all non "kRowIdForNulls" rows.
   std::vector<uint8_t> bytes_;
+  // A pre-encoded constant row with all its columns being null. Useful when
+  // the caller is certain that an entire row is null and then uses kRowIdForNulls
+  // to refer to it.
+  //
+  // EncodeAndAppend would never append this row, but encoded_row and Decode would
+  // return this row when kRowIdForNulls is passed.
   std::vector<uint8_t> encoded_nulls_;
   std::vector<std::shared_ptr<ExtensionType>> extension_types_;
 };
diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h
index 094a9c31efe0a..3ab86fd1fc6ed 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -38,7 +38,7 @@ struct ARROW_EXPORT RowTableMetadata {
   /// For a fixed-length binary row, common size of rows in bytes,
   /// rounded up to the multiple of alignment.
   ///
-  /// For a varying-length binary, size of all encoded fixed-length key columns,
+  /// For a varying-length binary row, size of all encoded fixed-length key columns,
   /// including lengths of varying-length columns, rounded up to the multiple of string
   /// alignment.
   uint32_t fixed_length;
diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc
index 2ac230232e1b7..b19d186447547 100644
--- a/cpp/src/arrow/datum.cc
+++ b/cpp/src/arrow/datum.cc
@@ -25,6 +25,7 @@
 #include "arrow/array/array_base.h"
 #include "arrow/array/util.h"
 #include "arrow/chunked_array.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
 #include "arrow/table.h"
@@ -156,6 +157,45 @@ ArrayVector Datum::chunks() const {
   return this->chunked_array()->chunks();
 }
 
+DeviceAllocationTypeSet Datum::device_types() const {
+  switch (kind()) {
+    case NONE:
+      break;
+    case SCALAR:
+      // Scalars are asssumed as always residing in CPU memory for now.
+      return DeviceAllocationTypeSet::CpuOnly();
+    case ARRAY:
+      return DeviceAllocationTypeSet{array()->device_type()};
+    case CHUNKED_ARRAY:
+      return chunked_array()->device_types();
+    case RECORD_BATCH: {
+      auto& columns = record_batch()->columns();
+      if (columns.empty()) {
+        // An empty RecordBatch is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.add(column->device_type());
+      }
+      return set;
+    }
+    case TABLE: {
+      auto& columns = table()->columns();
+      if (columns.empty()) {
+        // An empty Table is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.Add(column->device_types());
+      }
+      return set;
+    }
+  }
+  return {};
+}
+
 bool Datum::Equals(const Datum& other) const {
   if (this->kind() != other.kind()) return false;
 
diff --git a/cpp/src/arrow/datum.h b/cpp/src/arrow/datum.h
index 31b2d2274c900..4a88e7a81125c 100644
--- a/cpp/src/arrow/datum.h
+++ b/cpp/src/arrow/datum.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arrow/array/data.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/scalar.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -295,6 +296,8 @@ struct ARROW_EXPORT Datum {
   /// \return empty if not arraylike
   ArrayVector chunks() const;
 
+  DeviceAllocationTypeSet device_types() const;
+
   /// \brief True if the two data are equal
   bool Equals(const Datum& other) const;
 
diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h
index f5cca0d27d7b2..1dbe5b4b13e89 100644
--- a/cpp/src/arrow/device.h
+++ b/cpp/src/arrow/device.h
@@ -32,24 +32,6 @@
 
 namespace arrow {
 
-/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
-enum class DeviceAllocationType : char {
-  kCPU = 1,
-  kCUDA = 2,
-  kCUDA_HOST = 3,
-  kOPENCL = 4,
-  kVULKAN = 7,
-  kMETAL = 8,
-  kVPI = 9,
-  kROCM = 10,
-  kROCM_HOST = 11,
-  kEXT_DEV = 12,
-  kCUDA_MANAGED = 13,
-  kONEAPI = 14,
-  kWEBGPU = 15,
-  kHEXAGON = 16,
-};
-
 class MemoryManager;
 
 /// \brief EXPERIMENTAL: Abstract interface for hardware devices
diff --git a/cpp/src/arrow/device_allocation_type_set.cc b/cpp/src/arrow/device_allocation_type_set.cc
new file mode 100644
index 0000000000000..83e9e57f2ee47
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.cc
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include "arrow/device_allocation_type_set.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type) {
+  switch (type) {
+    case DeviceAllocationType::kCPU:
+      return "CPU";
+    case DeviceAllocationType::kCUDA:
+      return "CUDA";
+    case DeviceAllocationType::kCUDA_HOST:
+      return "CUDA_HOST";
+    case DeviceAllocationType::kOPENCL:
+      return "OPENCL";
+    case DeviceAllocationType::kVULKAN:
+      return "VULKAN";
+    case DeviceAllocationType::kMETAL:
+      return "METAL";
+    case DeviceAllocationType::kVPI:
+      return "VPI";
+    case DeviceAllocationType::kROCM:
+      return "ROCM";
+    case DeviceAllocationType::kROCM_HOST:
+      return "ROCM_HOST";
+    case DeviceAllocationType::kEXT_DEV:
+      return "EXT_DEV";
+    case DeviceAllocationType::kCUDA_MANAGED:
+      return "CUDA_MANAGED";
+    case DeviceAllocationType::kONEAPI:
+      return "ONEAPI";
+    case DeviceAllocationType::kWEBGPU:
+      return "WEBGPU";
+    case DeviceAllocationType::kHEXAGON:
+      return "HEXAGON";
+  }
+  return "<UNKNOWN>";
+}
+
+std::string DeviceAllocationTypeSet::ToString() const {
+  std::string result = "{";
+  for (int i = 1; i <= kDeviceAllocationTypeMax; i++) {
+    if (device_type_bitset_.test(i)) {
+      // Skip all the unused values in the enum.
+      switch (i) {
+        case 0:
+        case 5:
+        case 6:
+          continue;
+      }
+      if (result.size() > 1) {
+        result += ", ";
+      }
+      result += DeviceAllocationTypeToCStr(static_cast<DeviceAllocationType>(i));
+    }
+  }
+  result += "}";
+  return result;
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/device_allocation_type_set.h b/cpp/src/arrow/device_allocation_type_set.h
new file mode 100644
index 0000000000000..974367307e6d4
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.h
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <bitset>
+#include <string>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+ARROW_EXPORT
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type);
+
+class ARROW_EXPORT DeviceAllocationTypeSet {
+ private:
+  std::bitset<kDeviceAllocationTypeMax + 1> device_type_bitset_;
+
+ public:
+  /// \brief Construct an empty set of device types.
+  DeviceAllocationTypeSet() = default;
+
+  /// \brief Construct a set of device types with a single device type.
+  DeviceAllocationTypeSet(  // NOLINT implicit construction
+      DeviceAllocationType accepted_device_type) {
+    add(accepted_device_type);
+  }
+
+  /// \brief Construct a set of device types containing only "kCPU".
+  static DeviceAllocationTypeSet CpuOnly() {
+    return DeviceAllocationTypeSet{DeviceAllocationType::kCPU};
+  }
+
+  /// \brief Construct a set of device types containing all device types.
+  static DeviceAllocationTypeSet All() {
+    DeviceAllocationTypeSet all;
+    all.device_type_bitset_.set();
+    // Don't set the invalid enum values.
+    all.device_type_bitset_.reset(0);
+    all.device_type_bitset_.reset(5);
+    all.device_type_bitset_.reset(6);
+    return all;
+  }
+
+  /// \brief Add a device type to the set of device types.
+  void add(DeviceAllocationType device_type) {
+    device_type_bitset_.set(static_cast<int>(device_type));
+  }
+
+  /// \brief Remove a device type from the set of device types.
+  void remove(DeviceAllocationType device_type) {
+    device_type_bitset_.reset(static_cast<int>(device_type));
+  }
+
+  /// \brief Return true iff the set only contains the CPU device type.
+  bool is_cpu_only() const {
+    return device_type_bitset_ == CpuOnly().device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes the
+  /// device type.
+  bool contains(DeviceAllocationType device_type) const {
+    return device_type_bitset_.test(static_cast<int>(device_type));
+  }
+
+  /// \brief Add all device types from another set to this set.
+  void Add(DeviceAllocationTypeSet other) {
+    device_type_bitset_ |= other.device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes all the
+  /// device types in the other set.
+  bool Contains(DeviceAllocationTypeSet other) const {
+    // other \subseteq this <==> (other \intersect this == other)
+    return (other.device_type_bitset_ & device_type_bitset_) == other.device_type_bitset_;
+  }
+
+  std::string ToString() const;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension/CMakeLists.txt b/cpp/src/arrow/extension/CMakeLists.txt
index fcd5fa529ab56..065ea3f1ddb16 100644
--- a/cpp/src/arrow/extension/CMakeLists.txt
+++ b/cpp/src/arrow/extension/CMakeLists.txt
@@ -15,22 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
-add_arrow_test(test
-               SOURCES
-               bool8_test.cc
-               PREFIX
-               "arrow-extension-bool8")
+set(CANONICAL_EXTENSION_TESTS bool8_test.cc uuid_test.cc)
 
-add_arrow_test(test
-               SOURCES
-               fixed_shape_tensor_test.cc
-               PREFIX
-               "arrow-fixed-shape-tensor")
+if(ARROW_JSON)
+  list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc opaque_test.cc)
+endif()
 
 add_arrow_test(test
                SOURCES
-               opaque_test.cc
+               ${CANONICAL_EXTENSION_TESTS}
                PREFIX
-               "arrow-extension-opaque")
+               "arrow-canonical-extensions")
 
 arrow_install_all_headers("arrow/extension")
diff --git a/cpp/src/arrow/extension/bool8.h b/cpp/src/arrow/extension/bool8.h
index 02e629b28a867..fbb507639e272 100644
--- a/cpp/src/arrow/extension/bool8.h
+++ b/cpp/src/arrow/extension/bool8.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "arrow/extension_type.h"
 
 namespace arrow::extension {
diff --git a/cpp/src/arrow/extension/bool8_test.cc b/cpp/src/arrow/extension/bool8_test.cc
index eabcfcf62d32c..ee77332bc3257 100644
--- a/cpp/src/arrow/extension/bool8_test.cc
+++ b/cpp/src/arrow/extension/bool8_test.cc
@@ -19,7 +19,6 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
-#include "arrow/testing/extension_type.h"
 #include "arrow/testing/gtest_util.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h
index 20ec20a64c2d4..80a602021c60b 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "arrow/extension_type.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index 3fd39a11ff50d..842a78e1a4f7a 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -23,7 +23,7 @@
 #include "arrow/array/array_primitive.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
-#include "arrow/ipc/writer.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/record_batch.h"
 #include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
@@ -33,6 +33,7 @@
 namespace arrow {
 
 using FixedShapeTensorType = extension::FixedShapeTensorType;
+using arrow::ipc::test::RoundtripBatch;
 using extension::fixed_shape_tensor;
 using extension::FixedShapeTensorArray;
 
@@ -71,20 +72,6 @@ class TestExtensionType : public ::testing::Test {
   std::string serialized_;
 };
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, CheckDummyRegistration) {
   // We need a registered dummy type at runtime to allow for IPC deserialization
   auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
diff --git a/cpp/src/arrow/extension/opaque.h b/cpp/src/arrow/extension/opaque.h
index 9814b391cbad6..5d3411798f88d 100644
--- a/cpp/src/arrow/extension/opaque.h
+++ b/cpp/src/arrow/extension/opaque.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "arrow/extension_type.h"
 #include "arrow/type.h"
 
diff --git a/cpp/src/arrow/extension/opaque_test.cc b/cpp/src/arrow/extension/opaque_test.cc
index 1629cdb39651c..16fcba3fa6bb0 100644
--- a/cpp/src/arrow/extension/opaque_test.cc
+++ b/cpp/src/arrow/extension/opaque_test.cc
@@ -25,7 +25,6 @@
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
-#include "arrow/testing/extension_type.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/checked_cast.h"
@@ -169,7 +168,6 @@ TEST(OpaqueType, MetadataRoundTrip) {
 TEST(OpaqueType, BatchRoundTrip) {
   auto type = internal::checked_pointer_cast<extension::OpaqueType>(
       extension::opaque(binary(), "geometry", "adbc.postgresql"));
-  ExtensionTypeGuard guard(type);
 
   auto storage = ArrayFromJSON(binary(), R"(["foobar", null])");
   auto array = ExtensionType::WrapArray(type, storage);
diff --git a/cpp/src/arrow/extension/uuid.cc b/cpp/src/arrow/extension/uuid.cc
new file mode 100644
index 0000000000000..43b917a17f8b2
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include "arrow/extension_type.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/extension/uuid.h"
+
+namespace arrow::extension {
+
+bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+  return (other.extension_name() == this->extension_name());
+}
+
+std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.uuid",
+            static_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<UuidArray>(data);
+}
+
+Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
+  if (!serialized.empty()) {
+    return Status::Invalid("Unexpected serialized metadata: '", serialized, "'");
+  }
+  if (!storage_type->Equals(*fixed_size_binary(16))) {
+    return Status::Invalid("Invalid storage type for UuidType: ",
+                           storage_type->ToString());
+  }
+  return std::make_shared<UuidType>();
+}
+
+std::string UuidType::ToString(bool show_metadata) const {
+  std::stringstream ss;
+  ss << "extension<" << this->extension_name() << ">";
+  return ss.str();
+}
+
+std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid.h b/cpp/src/arrow/extension/uuid.h
new file mode 100644
index 0000000000000..42bb21cf0b2ed
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+/// \brief UuidArray stores array of UUIDs. Underlying storage type is
+/// FixedSizeBinary(16).
+class ARROW_EXPORT UuidArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief UuidType is a canonical arrow extension type for UUIDs.
+/// UUIDs are stored as FixedSizeBinary(16) with big-endian notation and this
+/// does not interpret the bytes in any way. Specific UUID version is not
+/// required or guaranteed.
+class ARROW_EXPORT UuidType : public ExtensionType {
+ public:
+  /// \brief Construct a UuidType.
+  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+
+  std::string extension_name() const override { return "arrow.uuid"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  /// Create a UuidArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized) const override;
+
+  std::string Serialize() const override { return ""; }
+
+  /// \brief Create a UuidType instance
+  static Result<std::shared_ptr<DataType>> Make() { return std::make_shared<UuidType>(); }
+};
+
+/// \brief Return a UuidType instance.
+ARROW_EXPORT std::shared_ptr<DataType> uuid();
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid_test.cc b/cpp/src/arrow/extension/uuid_test.cc
new file mode 100644
index 0000000000000..3bbb6eeb4aef1
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid_test.cc
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/uuid.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+#include "arrow/testing/extension_type.h"
+
+namespace arrow {
+
+using arrow::ipc::test::RoundtripBatch;
+
+TEST(TestUuuidExtensionType, ExtensionTypeTest) {
+  auto type = uuid();
+  ASSERT_EQ(type->id(), Type::EXTENSION);
+
+  const auto& ext_type = static_cast<const ExtensionType&>(*type);
+  std::string serialized = ext_type.Serialize();
+
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       ext_type.Deserialize(fixed_size_binary(16), serialized));
+  ASSERT_TRUE(deserialized->Equals(*type));
+  ASSERT_FALSE(deserialized->Equals(*fixed_size_binary(16)));
+}
+
+TEST(TestUuuidExtensionType, RoundtripBatch) {
+  auto ext_type = extension::uuid();
+  auto exact_ext_type = internal::checked_pointer_cast<extension::UuidType>(ext_type);
+  auto arr = ArrayFromJSON(fixed_size_binary(16), R"(["abcdefghijklmnop", null])");
+  auto ext_arr = ExtensionType::WrapArray(ext_type, arr);
+
+  // Pass extension array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch;
+  auto ext_field = field(/*name=*/"f0", /*type=*/ext_type);
+  auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
+  RoundtripBatch(batch, &read_batch);
+  CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
+
+  // Pass extension metadata and storage array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch2;
+  auto ext_metadata =
+      key_value_metadata({{"ARROW:extension:name", exact_ext_type->extension_name()},
+                          {"ARROW:extension:metadata", ""}});
+  ext_field = field(/*name=*/"f0", /*type=*/exact_ext_type->storage_type(),
+                    /*nullable=*/true, /*metadata=*/ext_metadata);
+  auto batch2 = RecordBatch::Make(schema({ext_field}), arr->length(), {arr});
+  RoundtripBatch(batch2, &read_batch2);
+  CompareBatch(*batch, *read_batch2, /*compare_metadata=*/true);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 685018f7de7b8..fc220f73a6beb 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -27,10 +27,12 @@
 #include "arrow/array/util.h"
 #include "arrow/chunked_array.h"
 #include "arrow/config.h"
-#ifdef ARROW_JSON
 #include "arrow/extension/bool8.h"
+#ifdef ARROW_JSON
 #include "arrow/extension/fixed_shape_tensor.h"
+#include "arrow/extension/opaque.h"
 #endif
+#include "arrow/extension/uuid.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
@@ -143,17 +145,20 @@ static std::once_flag registry_initialized;
 namespace internal {
 
 static void CreateGlobalRegistry() {
+  // Register canonical extension types
+
   g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
+  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8(), extension::uuid()};
 
 #ifdef ARROW_JSON
-  // Register canonical extension types
-  auto fst_ext_type =
-      checked_pointer_cast<ExtensionType>(extension::fixed_shape_tensor(int64(), {}));
-  ARROW_CHECK_OK(g_registry->RegisterType(fst_ext_type));
-
-  auto bool8_ext_type = checked_pointer_cast<ExtensionType>(extension::bool8());
-  ARROW_CHECK_OK(g_registry->RegisterType(bool8_ext_type));
+  ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
+  ext_types.push_back(extension::opaque(null(), "", ""));
 #endif
+
+  for (const auto& ext_type : ext_types) {
+    ARROW_CHECK_OK(
+        g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
+  }
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/extension_type_test.cc b/cpp/src/arrow/extension_type_test.cc
index f104c984a64b4..f49ffc5cba553 100644
--- a/cpp/src/arrow/extension_type_test.cc
+++ b/cpp/src/arrow/extension_type_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/options.h"
 #include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -41,6 +42,8 @@
 
 namespace arrow {
 
+using arrow::ipc::test::RoundtripBatch;
+
 class Parametric1Array : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
@@ -178,7 +181,7 @@ class ExtStructType : public ExtensionType {
 
 class TestExtensionType : public ::testing::Test {
  public:
-  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<UuidType>())); }
+  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<ExampleUuidType>())); }
 
   void TearDown() {
     if (GetExtensionType("uuid")) {
@@ -211,20 +214,6 @@ TEST_F(TestExtensionType, ExtensionTypeTest) {
   ASSERT_EQ(deserialized->byte_width(), 16);
 }
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, IpcRoundtrip) {
   auto ext_arr = ExampleUuid();
   auto batch = RecordBatch::Make(schema({field("f0", uuid())}), 4, {ext_arr});
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index dec4bb6e3d465..7afdf566f2fb5 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -47,9 +47,7 @@ if(ARROW_GCS)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 google-cloud-cpp::storage
-                 Boost::filesystem
-                 Boost::system)
+                 google-cloud-cpp::storage)
 endif()
 
 if(ARROW_AZURE)
@@ -57,9 +55,7 @@ if(ARROW_AZURE)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 ${AZURE_SDK_LINK_LIBRARIES}
-                 Boost::filesystem
-                 Boost::system)
+                 ${AZURE_SDK_LINK_LIBRARIES})
 endif()
 
 if(ARROW_S3)
@@ -75,11 +71,7 @@ if(ARROW_S3)
   else()
     list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static)
   endif()
-  list(APPEND
-       ARROW_S3_TEST_EXTRA_LINK_LIBS
-       ${AWSSDK_LINK_LIBRARIES}
-       Boost::filesystem
-       Boost::system)
+  list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
   add_arrow_test(s3fs_test
                  SOURCES
                  s3fs_test.cc
@@ -122,9 +114,7 @@ if(ARROW_S3)
                         s3_test_util.cc
                         STATIC_LINK_LIBS
                         ${AWSSDK_LINK_LIBRARIES}
-                        ${ARROW_BENCHMARK_LINK_LIBS}
-                        Boost::filesystem
-                        Boost::system)
+                        ${ARROW_BENCHMARK_LINK_LIBS})
     if(ARROW_TEST_LINKAGE STREQUAL "static")
       target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
     else()
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 9d437d1f83aac..a8dc923476752 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -15,24 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
@@ -53,6 +35,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
@@ -67,7 +50,6 @@ namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
 using internal::ConcatAbstractPath;
-namespace bp = boost::process;
 
 using ::testing::IsEmpty;
 using ::testing::Not;
@@ -174,42 +156,32 @@ class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
  private:
   std::unique_ptr<TemporaryDir> temp_dir_;
   arrow::internal::PlatformFilename debug_log_path_;
-  bp::child server_process_;
+  std::unique_ptr<util::Process> server_process_;
 
   using AzureEnvImpl::AzureEnvImpl;
 
  public:
   static const AzureBackend kBackend = AzureBackend::kAzurite;
 
-  ~AzuriteEnv() override {
-    server_process_.terminate();
-    server_process_.wait();
-  }
+  ~AzuriteEnv() = default;
 
   static Result<std::unique_ptr<AzureEnvImpl>> Make() {
     auto self = std::unique_ptr<AzuriteEnv>(
         new AzuriteEnv("devstoreaccount1",
                        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
                        "K1SZFPTOtr/KBHBeksoGMGw=="));
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      return Status::Invalid("Could not find Azurite emulator.");
-    }
+    self->server_process_ = std::make_unique<util::Process>();
+    ARROW_RETURN_NOT_OK(self->server_process_->SetExecutable("azurite"));
     ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-"));
     ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
                           self->temp_dir_->path().Join("debug.log"));
-    auto server_process = bp::child(
-        boost::this_process::environment(), exe_path, "--silent", "--location",
-        self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString(),
-        // For old Azurite. We can't install the latest Azurite with
-        // old Node.js on old Ubuntu.
-        "--skipApiVersionCheck");
-    if (!server_process.valid() || !server_process.running()) {
-      server_process.terminate();
-      server_process.wait();
-      return Status::Invalid("Could not start Azurite emulator.");
-    }
-    self->server_process_ = std::move(server_process);
+    self->server_process_->SetArgs({"--silent", "--location",
+                                    self->temp_dir_->path().ToString(), "--debug",
+                                    self->debug_log_path_.ToString(),
+                                    // For old Azurite. We can't install the latest
+                                    // Azurite with old Node.js on old Ubuntu.
+                                    "--skipApiVersionCheck"});
+    ARROW_RETURN_NOT_OK(self->server_process_->Execute());
     return self;
   }
 
diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc
index a6022a8d21681..d4d5edf4b8993 100644
--- a/cpp/src/arrow/filesystem/gcsfs_test.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_test.cc
@@ -15,26 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-#include <boost/thread.hpp>
-
 #include "arrow/filesystem/gcsfs.h"
 
 #include <absl/time/time.h>
@@ -45,16 +25,15 @@
 #include <google/cloud/storage/options.h>
 #include <gtest/gtest.h>
 
-#include <array>
 #include <random>
 #include <string>
-#include <thread>
 
 #include "arrow/filesystem/gcsfs_internal.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/key_value_metadata.h"
@@ -64,7 +43,6 @@ namespace fs {
 
 namespace {
 
-namespace bp = boost::process;
 namespace gc = google::cloud;
 namespace gcs = google::cloud::storage;
 
@@ -89,70 +67,62 @@ class GcsTestbench : public ::testing::Environment {
  public:
   GcsTestbench() {
     port_ = std::to_string(GetListenPort());
-    std::vector<std::string> names{"python3", "python"};
-    // If the build script or application developer provides a value in the PYTHON
-    // environment variable, then just use that.
-    if (const auto* env = std::getenv("PYTHON")) {
-      names = {env};
+    auto error = std::string("Could not start GCS emulator 'storage-testbench'");
+    auto server_process = std::make_unique<util::Process>();
+    auto status = server_process->SetExecutable("storage-testbench");
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
+    }
+
+    server_process->SetArgs({"--port", port_});
+    server_process->IgnoreStderr();
+    status = server_process->Execute();
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
     }
-    auto error = std::string(
-        "Could not start GCS emulator."
-        " Used the following list of python interpreter names:");
-    for (const auto& interpreter : names) {
-      auto exe_path = bp::search_path(interpreter);
-      error += " " + interpreter;
-      if (exe_path.empty()) {
-        error += " (exe not found)";
-        continue;
-      }
 
-      bp::ipstream output;
-      server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_,
-                                  bp::std_err > output);
-      // Wait for message: "* Restarting with"
-      auto testbench_is_running = [&output, this](bp::child& process) {
-        std::string line;
-        std::chrono::time_point<std::chrono::steady_clock> end =
-            std::chrono::steady_clock::now() + std::chrono::seconds(10);
-        while (server_process_.valid() && server_process_.running() &&
-               std::chrono::steady_clock::now() < end) {
-          if (output.peek() && std::getline(output, line)) {
-            std::cerr << line << std::endl;
-            if (line.find("* Restarting with") != std::string::npos) return true;
-          } else {
-            std::this_thread::sleep_for(std::chrono::milliseconds(20));
-          }
+    auto testbench_is_running = [&server_process, this]() {
+      auto ready_timeout = std::chrono::seconds(10);
+      std::chrono::time_point<std::chrono::steady_clock> end =
+          std::chrono::steady_clock::now() + ready_timeout;
+      while (server_process->IsRunning() && std::chrono::steady_clock::now() < end) {
+        auto client = gcs::Client(
+            google::cloud::Options{}
+                .set<gcs::RestEndpointOption>("http://127.0.0.1:" + port_)
+                .set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials())
+                .set<gcs::RetryPolicyOption>(
+                    gcs::LimitedTimeRetryPolicy(ready_timeout).clone()));
+        auto metadata = client.GetBucketMetadata("nonexistent");
+        if (metadata.status().code() == google::cloud::StatusCode::kNotFound) {
+          return true;
         }
-        return false;
-      };
+      }
+      return false;
+    };
 
-      if (testbench_is_running(server_process_)) break;
-      error += " (failed to start)";
-      server_process_.terminate();
-      server_process_.wait();
+    if (!testbench_is_running()) {
+      error += " (failed to listen)";
+      error_ = std::move(error);
+      return;
     }
-    if (server_process_.valid() && server_process_.valid()) return;
-    error_ = std::move(error);
+
+    server_process_ = std::move(server_process);
   }
 
-  bool running() { return server_process_.running(); }
+  bool running() { return server_process_ && server_process_->IsRunning(); }
 
-  ~GcsTestbench() override {
-    // Brutal shutdown, kill the full process group because the GCS testbench may launch
-    // additional children.
-    group_.terminate();
-    if (server_process_.valid()) {
-      server_process_.wait();
-    }
-  }
+  ~GcsTestbench() = default;
 
   const std::string& port() const { return port_; }
   const std::string& error() const { return error_; }
 
  private:
   std::string port_;
-  bp::child server_process_;
-  bp::group group_;
+  std::unique_ptr<util::Process> server_process_;
   std::string error_;
 };
 
diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc
index eb29a677dae9e..003afa68f1e35 100644
--- a/cpp/src/arrow/filesystem/s3_test_util.cc
+++ b/cpp/src/arrow/filesystem/s3_test_util.cc
@@ -15,33 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
 #ifndef _WIN32
 #include <sys/wait.h>
 #endif
 
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#ifdef __MINGW32__
-#include <boost/asio/io_context.hpp>
-#endif
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-
 #include "arrow/filesystem/s3_test_util.h"
 #include "arrow/filesystem/s3fs.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
@@ -53,8 +33,6 @@ namespace fs {
 
 using ::arrow::internal::TemporaryDir;
 
-namespace bp = boost::process;
-
 namespace {
 
 const char* kMinioExecutableName = "minio";
@@ -75,7 +53,7 @@ struct MinioTestServer::Impl {
   std::string connect_string_;
   std::string access_key_ = kMinioAccessKey;
   std::string secret_key_ = kMinioSecretKey;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 MinioTestServer::MinioTestServer() : impl_(new Impl) {}
@@ -105,44 +83,23 @@ Status MinioTestServer::Start() {
 
   ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_, TemporaryDir::Make("s3fs-test-"));
 
-  // Get a copy of the current environment.
-  // (NOTE: using "auto" would return a native_environment that mutates
-  //  the current environment)
-  bp::environment env = boost::this_process::environment();
-  env["MINIO_ACCESS_KEY"] = kMinioAccessKey;
-  env["MINIO_SECRET_KEY"] = kMinioSecretKey;
+  impl_->server_process_ = std::make_unique<util::Process>();
+  impl_->server_process_->SetEnv("MINIO_ACCESS_KEY", kMinioAccessKey);
+  impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey);
   // Disable the embedded console (one less listening address to care about)
-  env["MINIO_BROWSER"] = "off";
-
+  impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
   impl_->connect_string_ = GenerateConnectString();
-  auto exe_path = bp::search_path(kMinioExecutableName);
-  if (exe_path.empty()) {
-    return Status::IOError("Failed to find minio executable ('", kMinioExecutableName,
-                           "') in PATH");
-  }
-
-  try {
-    // NOTE: --quiet makes startup faster by suppressing remote version check
-    impl_->server_process_ = std::make_shared<bp::child>(
-        env, exe_path, "server", "--quiet", "--compat", "--address",
-        impl_->connect_string_, impl_->temp_dir_->path().ToString());
-  } catch (const std::exception& e) {
-    return Status::IOError("Failed to launch Minio server: ", e.what());
-  }
+  ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
+  // NOTE: --quiet makes startup faster by suppressing remote version check
+  impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address",
+                                   impl_->connect_string_,
+                                   impl_->temp_dir_->path().ToString()});
+  ARROW_RETURN_NOT_OK(impl_->server_process_->Execute());
   return Status::OK();
 }
 
 Status MinioTestServer::Stop() {
-  if (impl_->server_process_ && impl_->server_process_->valid()) {
-    // Brutal shutdown
-    impl_->server_process_->terminate();
-    impl_->server_process_->wait();
-#ifndef _WIN32
-    // Despite calling wait() above, boost::process fails to clear zombies
-    // so do it ourselves.
-    waitpid(impl_->server_process_->id(), nullptr, 0);
-#endif
-  }
+  impl_->server_process_ = nullptr;
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 98f93705f6f56..b12476ac3893a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -26,6 +26,12 @@ endif()
 if(WIN32)
   list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)
 endif()
+# Updating the MACOSX_DEPLOYMENT_TARGET to 12 required us to explicitly
+# link Flight with OpenSSL on macOS. Read this comment for more details:
+# https://github.com/apache/arrow/pull/43137#pullrequestreview-2267476893
+if(APPLE AND ARROW_USE_OPENSSL)
+  list(APPEND ARROW_FLIGHT_LINK_LIBS ${ARROW_OPENSSL_LIBS})
+endif()
 
 set(ARROW_FLIGHT_TEST_LINKAGE "${ARROW_TEST_LINKAGE}")
 if(Protobuf_USE_STATIC_LIBS)
@@ -64,11 +70,6 @@ if(ARROW_BUILD_BENCHMARKS
     endif()
   endif()
 endif()
-list(APPEND
-     ARROW_FLIGHT_TEST_INTERFACE_LIBS
-     Boost::headers
-     Boost::filesystem
-     Boost::system)
 list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++)
 
 # TODO(wesm): Protobuf shared vs static linking
diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index 057ef15c3c7ae..661c47737f024 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -491,7 +491,7 @@ int main(int argc, char** argv) {
         if (FLAGS_cuda && FLAGS_test_put) {
           server_args.push_back("-cuda");
         }
-        server->Start(server_args);
+        ABORT_NOT_OK(server->Start(server_args));
       }
       std::cout << "Server host: " << FLAGS_server_host << std::endl
                 << "Server port: " << FLAGS_server_port << std::endl;
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 3d52bc3f5ae06..6425233dadec4 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -204,7 +204,7 @@ ARROW_FLIGHT_TEST_ASYNC_CLIENT(GrpcAsyncClientTest);
 
 TEST(TestFlight, ConnectUri) {
   TestServer server("flight-test-server");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
@@ -230,7 +230,7 @@ TEST(TestFlight, InvalidUriScheme) {
 #ifndef _WIN32
 TEST(TestFlight, ConnectUriUnix) {
   TestServer server("flight-test-server", "/tmp/flight-test.sock");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index 127827ff38cdd..aa10d9a7da822 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -17,11 +17,6 @@
 
 #include "arrow/flight/test_util.h"
 
-#ifdef __APPLE__
-#include <limits.h>
-#include <mach-o/dyld.h>
-#endif
-
 #include <algorithm>
 #include <cstdlib>
 #include <fstream>
@@ -31,18 +26,13 @@
 #include "arrow/util/windows_compatibility.h"
 
 #include <gtest/gtest.h>
-#include <boost/filesystem.hpp>
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/array.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/ipc/test_common.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/logging.h"
 
@@ -51,101 +41,27 @@
 
 namespace arrow::flight {
 
-namespace bp = boost::process;
-namespace fs = boost::filesystem;
-
-namespace {
-
-Status ResolveCurrentExecutable(fs::path* out) {
-  // See https://stackoverflow.com/a/1024937/10194 for various
-  // platform-specific recipes.
-
-  boost::system::error_code ec;
-
-#if defined(__linux__)
-  *out = fs::canonical("/proc/self/exe", ec);
-#elif defined(__APPLE__)
-  char buf[PATH_MAX + 1];
-  uint32_t bufsize = sizeof(buf);
-  if (_NSGetExecutablePath(buf, &bufsize) < 0) {
-    return Status::Invalid("Can't resolve current exe: path too large");
-  }
-  *out = fs::canonical(buf, ec);
-#elif defined(_WIN32)
-  char buf[MAX_PATH + 1];
-  if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
-    return Status::Invalid("Can't get executable file path");
-  }
-  *out = fs::canonical(buf, ec);
-#else
-  ARROW_UNUSED(ec);
-  return Status::NotImplemented("Not available on this system");
-#endif
-  if (ec) {
-    // XXX fold this into the Status class?
-    return Status::IOError("Can't resolve current exe: ", ec.message());
+Status TestServer::Start(const std::vector<std::string>& extra_args) {
+  server_process_ = std::make_unique<util::Process>();
+  ARROW_RETURN_NOT_OK(server_process_->SetExecutable(executable_name_));
+  std::vector<std::string> args = {};
+  if (unix_sock_.empty()) {
+    args.push_back("-port");
+    args.push_back(std::to_string(port_));
   } else {
-    return Status::OK();
-  }
-}
-
-}  // namespace
-
-void TestServer::Start(const std::vector<std::string>& extra_args) {
-  namespace fs = boost::filesystem;
-
-  std::string str_port = std::to_string(port_);
-  std::vector<fs::path> search_path = ::boost::this_process::path();
-  // If possible, prepend current executable directory to search path,
-  // since it's likely that the test server executable is located in
-  // the same directory as the running unit test.
-  fs::path current_exe;
-  Status st = ResolveCurrentExecutable(&current_exe);
-  if (st.ok()) {
-    search_path.insert(search_path.begin(), current_exe.parent_path());
-  } else if (st.IsNotImplemented()) {
-    ARROW_CHECK(st.IsNotImplemented()) << st.ToString();
-  }
-
-  try {
-    if (unix_sock_.empty()) {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-port", str_port, bp::args(extra_args));
-    } else {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-server_unix", unix_sock_, bp::args(extra_args));
-    }
-  } catch (...) {
-    std::stringstream ss;
-    ss << "Failed to launch test server '" << executable_name_ << "', looked in ";
-    for (const auto& path : search_path) {
-      ss << path << " : ";
-    }
-    ARROW_LOG(FATAL) << ss.str();
-    throw;
+    args.push_back("-server_unix");
+    args.push_back(unix_sock_);
   }
-  std::cout << "Server running with pid " << server_process_->id() << std::endl;
+  args.insert(args.end(), extra_args.begin(), extra_args.end());
+  server_process_->SetArgs(args);
+  ARROW_RETURN_NOT_OK(server_process_->Execute());
+  std::cout << "Server running with pid " << server_process_->pid() << std::endl;
+  return Status::OK();
 }
 
-int TestServer::Stop() {
-  if (server_process_ && server_process_->valid()) {
-#ifndef _WIN32
-    kill(server_process_->id(), SIGTERM);
-#else
-    // This would use SIGKILL on POSIX, which is more brutal than SIGTERM
-    server_process_->terminate();
-#endif
-    server_process_->wait();
-    return server_process_->exit_code();
-  } else {
-    // Presumably the server wasn't able to start
-    return -1;
-  }
-}
+void TestServer::Stop() { server_process_ = nullptr; }
 
-bool TestServer::IsRunning() { return server_process_->running(); }
+bool TestServer::IsRunning() { return server_process_->IsRunning(); }
 
 int TestServer::port() const { return port_; }
 
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index 15ba6145ecd2b..946caebcc2b5a 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -29,6 +29,7 @@
 
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 
 #include "arrow/flight/client.h"
@@ -36,14 +37,6 @@
 #include "arrow/flight/types.h"
 #include "arrow/flight/visibility.h"
 
-namespace boost {
-namespace process {
-
-class child;
-
-}  // namespace process
-}  // namespace boost
-
 namespace arrow {
 namespace flight {
 
@@ -76,10 +69,10 @@ class ARROW_FLIGHT_EXPORT TestServer {
   TestServer(const std::string& executable_name, const std::string& unix_sock)
       : executable_name_(executable_name), unix_sock_(unix_sock) {}
 
-  void Start(const std::vector<std::string>& extra_args);
-  void Start() { Start({}); }
+  Status Start(const std::vector<std::string>& extra_args);
+  Status Start() { return Start({}); }
 
-  int Stop();
+  void Stop();
 
   bool IsRunning();
 
@@ -90,7 +83,7 @@ class ARROW_FLIGHT_EXPORT TestServer {
   std::string executable_name_;
   int port_;
   std::string unix_sock_;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 // Helper to initialize a server and matching client with callbacks to
diff --git a/cpp/src/arrow/integration/json_integration_test.cc b/cpp/src/arrow/integration/json_integration_test.cc
index 9b56928c68843..0e84ea6124d5d 100644
--- a/cpp/src/arrow/integration/json_integration_test.cc
+++ b/cpp/src/arrow/integration/json_integration_test.cc
@@ -1046,7 +1046,7 @@ TEST(TestJsonFileReadWrite, JsonExample2) {
 
     auto storage_array =
         ArrayFromJSON(fixed_size_binary(16), R"(["0123456789abcdef", null])");
-    AssertArraysEqual(*batch->column(0), UuidArray(uuid_type, storage_array));
+    AssertArraysEqual(*batch->column(0), ExampleUuidArray(uuid_type, storage_array));
 
     AssertArraysEqual(*batch->column(1), NullArray(2));
   }
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index 87c02e2d87a1e..fb4f6bd8eadcf 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -27,8 +27,10 @@
 #include "arrow/array.h"
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
-#include "arrow/array/builder_time.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
 #include "arrow/ipc/test_common.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -242,11 +244,11 @@ Status MakeRandomBooleanArray(const int length, bool include_nulls,
                               std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values(length);
   random_null_bytes(length, 0.5, values.data());
-  ARROW_ASSIGN_OR_RAISE(auto data, internal::BytesToBits(values));
+  ARROW_ASSIGN_OR_RAISE(auto data, arrow::internal::BytesToBits(values));
 
   if (include_nulls) {
     std::vector<uint8_t> valid_bytes(length);
-    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(valid_bytes));
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(valid_bytes));
     random_null_bytes(length, 0.1, valid_bytes.data());
     *out = std::make_shared<BooleanArray>(length, data, null_bitmap, -1);
   } else {
@@ -596,7 +598,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
   std::shared_ptr<Array> no_nulls(new StructArray(type, list_batch->num_rows(), columns));
   std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
   null_bytes[0] = 0;
-  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(null_bytes));
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(null_bytes));
   std::shared_ptr<Array> with_nulls(
       new StructArray(type, list_batch->num_rows(), columns, null_bitmap, 1));
 
@@ -1088,9 +1090,9 @@ Status MakeUuid(std::shared_ptr<RecordBatch>* out) {
   auto f1 = field("f1", uuid_type, /*nullable=*/false);
   auto schema = ::arrow::schema({f0, f1});
 
-  auto a0 = std::make_shared<UuidArray>(
+  auto a0 = std::make_shared<ExampleUuidArray>(
       uuid_type, ArrayFromJSON(storage_type, R"(["0123456789abcdef", null])"));
-  auto a1 = std::make_shared<UuidArray>(
+  auto a1 = std::make_shared<ExampleUuidArray>(
       uuid_type,
       ArrayFromJSON(storage_type, R"(["ZYXWVUTSRQPONMLK", "JIHGFEDBA9876543"])"));
 
@@ -1176,12 +1178,13 @@ enable_if_t<std::is_floating_point<CValueType>::value, void> FillRandomData(
 Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed) {
-  const auto& element_type = internal::checked_cast<const FixedWidthType&>(*type);
+  const auto& element_type = arrow::internal::checked_cast<const FixedWidthType&>(*type);
   std::vector<int64_t> strides;
   if (row_major_p) {
-    RETURN_NOT_OK(internal::ComputeRowMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(arrow::internal::ComputeRowMajorStrides(element_type, shape, &strides));
   } else {
-    RETURN_NOT_OK(internal::ComputeColumnMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(
+        arrow::internal::ComputeColumnMajorStrides(element_type, shape, &strides));
   }
 
   const int64_t element_size = element_type.bit_width() / CHAR_BIT;
@@ -1233,6 +1236,20 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
   return Tensor::Make(type, buf, shape, strides).Value(out);
 }
 
+void RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                    std::shared_ptr<RecordBatch>* out) {
+  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
+                                        out_stream.get()));
+
+  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
+  ASSERT_OK(batch_reader->ReadNext(out));
+}
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/test_common.h b/cpp/src/arrow/ipc/test_common.h
index db8613cbb1e6a..9b7e7f13e3a8e 100644
--- a/cpp/src/arrow/ipc/test_common.h
+++ b/cpp/src/arrow/ipc/test_common.h
@@ -184,6 +184,9 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed = 0);
 
+ARROW_TESTING_EXPORT void RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                                         std::shared_ptr<RecordBatch>* out);
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 1e855311a98ed..34207781277d1 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -858,7 +858,7 @@ class PoolBuffer final : public ResizableBuffer {
     }
     uint8_t* ptr = mutable_data();
     if (!ptr || capacity > capacity_) {
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(capacity);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(capacity));
       if (ptr) {
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
       } else {
@@ -878,7 +878,7 @@ class PoolBuffer final : public ResizableBuffer {
     if (ptr && shrink_to_fit && new_size <= size_) {
       // Buffer is non-null and is not growing, so shrink to the requested size without
       // excess space.
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(new_size);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(new_size));
       if (capacity_ != new_capacity) {
         // Buffer hasn't got yet the requested size.
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
@@ -916,6 +916,13 @@ class PoolBuffer final : public ResizableBuffer {
   }
 
  private:
+  static Result<int64_t> RoundCapacity(int64_t capacity) {
+    if (capacity > std::numeric_limits<int64_t>::max() - 63) {
+      return Status::OutOfMemory("capacity too large");
+    }
+    return bit_util::RoundUpToMultipleOf64(capacity);
+  }
+
   MemoryPool* pool_;
   int64_t alignment_;
 };
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index 104a5697b5727..e9ec13e98b4ee 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -43,7 +43,6 @@ namespace arrow {
 
 using compute::Cast;
 using compute::CastOptions;
-
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
@@ -2038,7 +2037,7 @@ class TestExtensionScalar : public ::testing::Test {
   void SetUp() {
     type_ = uuid();
     storage_type_ = fixed_size_binary(16);
-    uuid_type_ = checked_cast<const UuidType*>(type_.get());
+    uuid_type_ = checked_cast<const ExampleUuidType*>(type_.get());
   }
 
  protected:
@@ -2049,7 +2048,7 @@ class TestExtensionScalar : public ::testing::Test {
   }
 
   std::shared_ptr<DataType> type_, storage_type_;
-  const UuidType* uuid_type_{nullptr};
+  const ExampleUuidType* uuid_type_{nullptr};
 
   const std::string_view uuid_string1_{UUID_STRING1};
   const std::string_view uuid_string2_{UUID_STRING2};
diff --git a/cpp/src/arrow/testing/extension_type.h b/cpp/src/arrow/testing/extension_type.h
index 6515631f202ae..a4526e31c2b93 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -27,14 +27,14 @@
 
 namespace arrow {
 
-class ARROW_TESTING_EXPORT UuidArray : public ExtensionArray {
+class ARROW_TESTING_EXPORT ExampleUuidArray : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
 };
 
-class ARROW_TESTING_EXPORT UuidType : public ExtensionType {
+class ARROW_TESTING_EXPORT ExampleUuidType : public ExtensionType {
  public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+  ExampleUuidType() : ExtensionType(fixed_size_binary(16)) {}
 
   std::string extension_name() const override { return "uuid"; }
 
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 95de16c715f19..ae2e53b30a3ee 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -49,9 +49,13 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/datum.h"
+#include "arrow/io/memory.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
 #include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
@@ -847,17 +851,17 @@ Future<> SleepABitAsync() {
 ///////////////////////////////////////////////////////////////////////////
 // Extension types
 
-bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+bool ExampleUuidType::ExtensionEquals(const ExtensionType& other) const {
   return (other.extension_name() == this->extension_name());
 }
 
-std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+std::shared_ptr<Array> ExampleUuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
   DCHECK_EQ("uuid", static_cast<const ExtensionType&>(*data->type).extension_name());
-  return std::make_shared<UuidArray>(data);
+  return std::make_shared<ExampleUuidArray>(data);
 }
 
-Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+Result<std::shared_ptr<DataType>> ExampleUuidType::Deserialize(
     std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
   if (serialized != "uuid-serialized") {
     return Status::Invalid("Type identifier did not match: '", serialized, "'");
@@ -866,7 +870,7 @@ Result<std::shared_ptr<DataType>> UuidType::Deserialize(
     return Status::Invalid("Invalid storage type for UuidType: ",
                            storage_type->ToString());
   }
-  return std::make_shared<UuidType>();
+  return std::make_shared<ExampleUuidType>();
 }
 
 bool SmallintType::ExtensionEquals(const ExtensionType& other) const {
@@ -982,7 +986,7 @@ Result<std::shared_ptr<DataType>> Complex128Type::Deserialize(
   return std::make_shared<Complex128Type>();
 }
 
-std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+std::shared_ptr<DataType> uuid() { return std::make_shared<ExampleUuidType>(); }
 
 std::shared_ptr<DataType> smallint() { return std::make_shared<SmallintType>(); }
 
diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc
new file mode 100644
index 0000000000000..32da81f14630e
--- /dev/null
+++ b/cpp/src/arrow/testing/process.cc
@@ -0,0 +1,298 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/process.h"
+#include "arrow/result.h"
+
+// This boost/asio/io_context.hpp include is needless for no MinGW
+// build.
+//
+// This is for including boost/asio/detail/socket_types.hpp before any
+// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
+// work if windows.h is already included.
+#include <boost/asio/io_context.hpp>
+
+#ifdef BOOST_PROCESS_HAVE_V2
+// We can't use v2 API on Windows because v2 API doesn't support
+// process group [1] and GCS testbench uses multiple processes [2].
+//
+// [1] https://github.com/boostorg/process/issues/259
+// [2] https://github.com/googleapis/storage-testbench/issues/669
+#ifndef _WIN32
+#define BOOST_PROCESS_USE_V2
+#endif
+#endif
+
+#ifdef BOOST_PROCESS_USE_V2
+#ifdef BOOST_PROCESS_NEED_SOURCE
+// Workaround for https://github.com/boostorg/process/issues/312
+#define BOOST_PROCESS_V2_SEPARATE_COMPILATION
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#endif
+#include <boost/process/v2.hpp>
+#include <boost/process/v2/src.hpp>
+#else
+#include <boost/process/v2.hpp>
+#endif
+#include <unordered_map>
+#else
+// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
+// boost/process.hpp. boost/process/detail/windows/handle_workaround.hpp
+// doesn't work without BOOST_USE_WINDOWS_H with MinGW because MinGW
+// doesn't provide __kernel_entry without winternl.h.
+//
+// See also:
+// https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
+#ifdef __MINGW32__
+#define BOOST_USE_WINDOWS_H = 1
+#endif
+#ifdef BOOST_PROCESS_HAVE_V1
+#include <boost/process/v1.hpp>
+#else
+#include <boost/process.hpp>
+#endif
+#endif
+
+#ifdef __APPLE__
+#include <limits.h>
+#include <mach-o/dyld.h>
+#endif
+
+#include <chrono>
+#include <iostream>
+#include <sstream>
+#include <thread>
+
+#ifdef BOOST_PROCESS_USE_V2
+namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE;
+namespace process = BOOST_PROCESS_V2_NAMESPACE;
+namespace filesystem = process::filesystem;
+#elif defined(BOOST_PROCESS_HAVE_V1)
+namespace process = boost::process::v1;
+namespace filesystem = boost::process::v1::filesystem;
+#else
+namespace process = boost::process;
+namespace filesystem = boost::filesystem;
+#endif
+
+namespace arrow::util {
+
+class Process::Impl {
+ public:
+  Impl() {
+    // Get a copy of the current environment.
+#ifdef BOOST_PROCESS_USE_V2
+    for (const auto& kv : process::environment::current()) {
+      env_[kv.key()] = process::environment::value(kv.value());
+    }
+#else
+    env_ = process::environment(boost::this_process::environment());
+#endif
+  }
+
+  ~Impl() {
+#ifdef BOOST_PROCESS_USE_V2
+    // V2 doesn't provide process group support yet:
+    // https://github.com/boostorg/process/issues/259
+    //
+    // So we try graceful shutdown (SIGTERM + waitpid()) before
+    // immediate shutdown (SIGKILL). This assumes that the target
+    // executable such as "python3 -m testbench" terminates all related
+    // processes by graceful shutdown.
+    boost::system::error_code error_code;
+    if (process_ && process_->running(error_code)) {
+      process_->request_exit(error_code);
+      if (!error_code) {
+        auto timeout = std::chrono::seconds(3);
+        std::chrono::time_point<std::chrono::steady_clock> end =
+            std::chrono::steady_clock::now() + timeout;
+        while (process_->running(error_code) && std::chrono::steady_clock::now() < end) {
+          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+        }
+      }
+    }
+#else
+    process_group_ = nullptr;
+#endif
+    process_ = nullptr;
+  }
+
+  Status SetExecutable(const std::string& name) {
+#ifdef BOOST_PROCESS_USE_V2
+    executable_ = process::environment::find_executable(name);
+#else
+    executable_ = process::search_path(name);
+#endif
+    if (executable_.empty()) {
+      // Search the current executable directory as fallback.
+      ARROW_ASSIGN_OR_RAISE(auto current_exe, ResolveCurrentExecutable());
+#ifdef BOOST_PROCESS_USE_V2
+      std::unordered_map<process::environment::key, process::environment::value> env;
+      for (const auto& kv : process::environment::current()) {
+        env[kv.key()] = process::environment::value(kv.value());
+      }
+      env["PATH"] = process::environment::value(current_exe.parent_path());
+      executable_ = process::environment::find_executable(name, env);
+#else
+      executable_ = process::search_path(name, {current_exe.parent_path()});
+#endif
+    }
+    if (executable_.empty()) {
+      return Status::IOError("Failed to find '", name, "' in PATH");
+    }
+    return Status::OK();
+  }
+
+  void SetArgs(const std::vector<std::string>& args) { args_ = args; }
+
+  void SetEnv(const std::string& name, const std::string& value) {
+#ifdef BOOST_PROCESS_USE_V2
+    env_[name] = process::environment::value(value);
+#else
+    env_[name] = value;
+#endif
+  }
+
+  void IgnoreStderr() { keep_stderr_ = false; }
+
+  Status Execute() {
+    try {
+#ifdef BOOST_PROCESS_USE_V2
+      return ExecuteV2();
+#else
+      return ExecuteV1();
+#endif
+    } catch (const std::exception& e) {
+      return Status::IOError("Failed to launch '", executable_, "': ", e.what());
+    }
+  }
+
+  bool IsRunning() {
+#ifdef BOOST_PROCESS_USE_V2
+    boost::system::error_code error_code;
+    return process_ && process_->running(error_code);
+#else
+    return process_ && process_->running();
+#endif
+  }
+
+  uint64_t pid() {
+    if (!process_) {
+      return 0;
+    }
+    return process_->id();
+  }
+
+ private:
+  filesystem::path executable_;
+  std::vector<std::string> args_;
+  bool keep_stderr_ = true;
+#ifdef BOOST_PROCESS_USE_V2
+  std::unordered_map<process::environment::key, process::environment::value> env_;
+  std::unique_ptr<process::process> process_;
+  asio::io_context ctx_;
+  // boost/process/v2/ doesn't support process group yet:
+  // https://github.com/boostorg/process/issues/259
+#else
+  process::environment env_;
+  std::unique_ptr<process::child> process_;
+  std::unique_ptr<process::group> process_group_;
+#endif
+
+  Result<filesystem::path> ResolveCurrentExecutable() {
+    // See https://stackoverflow.com/a/1024937/10194 for various
+    // platform-specific recipes.
+
+    filesystem::path path;
+    boost::system::error_code error_code;
+
+#if defined(__linux__)
+    path = filesystem::canonical("/proc/self/exe", error_code);
+#elif defined(__APPLE__)
+    char buf[PATH_MAX + 1];
+    uint32_t bufsize = sizeof(buf);
+    if (_NSGetExecutablePath(buf, &bufsize) < 0) {
+      return Status::Invalid("Can't resolve current exe: path too large");
+    }
+    path = filesystem::canonical(buf, error_code);
+#elif defined(_WIN32)
+    char buf[MAX_PATH + 1];
+    if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
+      return Status::Invalid("Can't get executable file path");
+    }
+    path = filesystem::canonical(buf, error_code);
+#else
+    ARROW_UNUSED(error_code);
+    return Status::NotImplemented("Not available on this system");
+#endif
+    if (error_code) {
+      // XXX fold this into the Status class?
+      return Status::IOError("Can't resolve current exe: ", error_code.message());
+    } else {
+      return path;
+    }
+  }
+
+#ifdef BOOST_PROCESS_USE_V2
+  Status ExecuteV2() {
+    process::process_environment env(env_);
+    // We can't use std::make_unique<process::process>.
+    process_ = std::unique_ptr<process::process>(
+        new process::process(ctx_, executable_, args_, env,
+                             keep_stderr_ ? process::process_stdio{{}, {}, {}}
+                                          : process::process_stdio{{}, {}, nullptr}));
+    return Status::OK();
+  }
+#else
+  Status ExecuteV1() {
+    process_group_ = std::make_unique<process::group>();
+    if (keep_stderr_) {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_);
+    } else {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_,
+                                                  process::std_err > process::null);
+    }
+    return Status::OK();
+  }
+#endif
+};
+
+Process::Process() : impl_(new Impl()) {}
+
+Process::~Process() {}
+
+Status Process::SetExecutable(const std::string& path) {
+  return impl_->SetExecutable(path);
+}
+
+void Process::SetArgs(const std::vector<std::string>& args) { impl_->SetArgs(args); }
+
+void Process::SetEnv(const std::string& key, const std::string& value) {
+  impl_->SetEnv(key, value);
+}
+
+void Process::IgnoreStderr() { impl_->IgnoreStderr(); }
+
+Status Process::Execute() { return impl_->Execute(); }
+
+bool Process::IsRunning() { return impl_->IsRunning(); }
+
+uint64_t Process::pid() { return impl_->pid(); }
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/process.h b/cpp/src/arrow/testing/process.h
new file mode 100644
index 0000000000000..d4d2ae124f427
--- /dev/null
+++ b/cpp/src/arrow/testing/process.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/testing/visibility.h"
+
+namespace arrow::util {
+
+class ARROW_TESTING_EXPORT Process {
+ public:
+  Process();
+  ~Process();
+
+  Status SetExecutable(const std::string& path);
+  void SetArgs(const std::vector<std::string>& args);
+  void SetEnv(const std::string& name, const std::string& value);
+  void IgnoreStderr();
+  Status Execute();
+  bool IsRunning();
+  uint64_t pid();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 08777d247edbf..8faebe217f141 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -724,4 +724,25 @@ ARROW_EXPORT MemoryPool* default_memory_pool();
 
 constexpr int64_t kDefaultBufferAlignment = 64;
 
+/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
+enum class DeviceAllocationType : char {
+  kCPU = 1,
+  kCUDA = 2,
+  kCUDA_HOST = 3,
+  kOPENCL = 4,
+  kVULKAN = 7,
+  kMETAL = 8,
+  kVPI = 9,
+  kROCM = 10,
+  kROCM_HOST = 11,
+  kEXT_DEV = 12,
+  kCUDA_MANAGED = 13,
+  kONEAPI = 14,
+  kWEBGPU = 15,
+  kHEXAGON = 16,
+};
+constexpr int kDeviceAllocationTypeMax = 16;
+
+class DeviceAllocationTypeSet;
+
 }  // namespace arrow
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index c092ff4fd011f..c2bc7fc02797e 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -53,8 +53,8 @@ add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
                                           ${GANDIVA_PRECOMPILED_CC_PATH})
 
 # testing
-if(ARROW_BUILD_TESTS)
-  add_executable(gandiva-precompiled-test
+add_gandiva_test(precompiled-test
+                 SOURCES
                  ../context_helper.cc
                  bitmap_test.cc
                  bitmap.cc
@@ -75,16 +75,12 @@ if(ARROW_BUILD_TESTS)
                  decimal_ops_test.cc
                  decimal_ops.cc
                  ../decimal_type_util.cc
-                 ../decimal_xlarge.cc)
-  target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
-  target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}
-                                                         Boost::headers)
-  target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
-                                                              ARROW_STATIC GANDIVA_STATIC)
-  set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
-  add_test(gandiva-precompiled-test ${TEST_PATH})
-  set_property(TEST gandiva-precompiled-test
-               APPEND
-               PROPERTY LABELS "unittest;gandiva-tests")
-  add_dependencies(gandiva-tests gandiva-precompiled-test)
-endif()
+                 ../decimal_xlarge.cc
+                 EXTRA_INCLUDES
+                 ${CMAKE_SOURCE_DIR}/src
+                 EXTRA_LINK_LIBS
+                 Boost::headers
+                 DEFINITIONS
+                 GANDIVA_UNIT_TEST=1
+                 ARROW_STATIC
+                 GANDIVA_STATIC)
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 6f825b55cfd94..103e678ebb4ac 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -15,11 +15,11 @@
       ]
     },
     "benchmark",
+    "boost-crc",
     "boost-filesystem",
     "boost-multiprecision",
     "boost-process",
     "boost-system",
-    "boost-crc",
     "brotli",
     "bzip2",
     "c-ares",
diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index 1870888184906..ec438fde843f4 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 9e1866f84160b..bcfb813c11435 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -5,9 +5,9 @@
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.0" />
     <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.65.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
   </ItemGroup>
 
diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index efb22b1948a01..b89ce9da79d14 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Flight.Protocol;
@@ -34,12 +35,17 @@ public FlightClient(ChannelBase grpcChannel)
 
         public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria = null, Metadata headers = null)
         {
-            if(criteria == null)
+            return ListFlights(criteria, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            if (criteria == null)
             {
                 criteria = FlightCriteria.Empty;
             }
-            
-            var response = _client.ListFlights(criteria.ToProtocol(), headers);
+
+            var response = _client.ListFlights(criteria.ToProtocol(), headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.FlightInfo, FlightInfo>(response.ResponseStream, inFlight => new FlightInfo(inFlight));
 
             return new AsyncServerStreamingCall<FlightInfo>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -47,7 +53,12 @@ public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria
 
         public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers = null)
         {
-            var response = _client.ListActions(EmptyInstance, headers);
+            return ListActions(headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var response = _client.ListActions(EmptyInstance, headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.ActionType, FlightActionType>(response.ResponseStream, actionType => new FlightActionType(actionType));
 
             return new AsyncServerStreamingCall<FlightActionType>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -55,14 +66,24 @@ public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers =
 
         public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers = null)
         {
-            var stream = _client.DoGet(ticket.ToProtocol(),  headers);
+            return GetStream(ticket, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoGet(ticket.ToProtocol(), headers, deadline, cancellationToken);
             var responseStream = new FlightClientRecordBatchStreamReader(stream.ResponseStream);
             return new FlightRecordBatchStreamingCall(responseStream, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers);
+            return GetInfo(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var flightInfo = flightInfoResult
                 .ResponseAsync
@@ -79,7 +100,12 @@ public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Met
 
         public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channels = _client.DoPut(headers);
+            return StartPut(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channels = _client.DoPut(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channels.RequestStream, flightDescriptor);
             var readStream = new StreamReader<Protocol.PutResult, FlightPutResult>(channels.ResponseStream, putResult => new FlightPutResult(putResult));
             return new FlightRecordBatchDuplexStreamingCall(
@@ -93,7 +119,13 @@ public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDesc
 
         public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers = null)
         {
-            var channel = _client.Handshake(headers);
+            return Handshake(headers, null, CancellationToken.None);
+
+        }
+
+        public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.Handshake(headers, deadline, cancellationToken);
             var readStream = new StreamReader<HandshakeResponse, FlightHandshakeResponse>(channel.ResponseStream, response => new FlightHandshakeResponse(response));
             var writeStream = new FlightHandshakeStreamWriterAdapter(channel.RequestStream);
             var call = new AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>(
@@ -109,7 +141,12 @@ public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>
 
         public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channel = _client.DoExchange(headers);
+            return DoExchange(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.DoExchange(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channel.RequestStream, flightDescriptor);
             var responseStream = new FlightClientRecordBatchStreamReader(channel.ResponseStream);
             var call = new FlightRecordBatchExchangeCall(
@@ -125,14 +162,24 @@ public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescripto
 
         public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers = null)
         {
-            var stream = _client.DoAction(action.ToProtocol(), headers);
+            return DoAction(action, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoAction(action.ToProtocol(), headers, deadline, cancellationToken);
             var streamReader = new StreamReader<Protocol.Result, FlightResult>(stream.ResponseStream, result => new FlightResult(result));
             return new AsyncServerStreamingCall<FlightResult>(streamReader, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers);
+            return GetSchema(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var schema = schemaResult
                 .ResponseAsync
diff --git a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
index 16ddb6fbfb834..e2452ac9ff461 100644
--- a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
+++ b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
@@ -18,6 +18,7 @@
 using System.Text;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Ipc;
+using Google.Protobuf;
 
 namespace Apache.Arrow.Flight
 {
@@ -25,7 +26,7 @@ public class FlightInfo
     {
         internal FlightInfo(Protocol.FlightInfo flightInfo)
         {
-            Schema = FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory);
+            Schema = flightInfo.Schema?.Length > 0 ? FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory) : null;
             Descriptor = new FlightDescriptor(flightInfo.FlightDescriptor);
 
             var endpoints = new List<FlightEndpoint>();
@@ -60,7 +61,7 @@ public FlightInfo(Schema schema, FlightDescriptor descriptor, IReadOnlyList<Flig
 
         internal Protocol.FlightInfo ToProtocol()
         {
-            var serializedSchema = SchemaWriter.SerializeSchema(Schema);
+            var serializedSchema = Schema != null ? SchemaWriter.SerializeSchema(Schema) : ByteString.Empty;
             var response = new Protocol.FlightInfo()
             {
                 Schema = serializedSchema,
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 047cdb94b963e..4ea02e0ed21c0 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -7,7 +7,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index dc95f9edf9f7f..fd8274230ec64 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -6,7 +6,7 @@
     </PropertyGroup>
 
     <ItemGroup>
-      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
       <PackageReference Include="xunit" Version="2.9.0" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
       <PackageReference Include="coverlet.collector" Version="6.0.2" />
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
index 14227e2c4eb6b..5ed7cc47d6ac2 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.0" />
     <PackageReference Include="Grpc.AspNetCore" Version="2.65.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index e68a97670cc7e..eae9ab746f283 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -6,7 +6,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
     <PackageReference Include="coverlet.collector" Version="6.0.2" />
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
index aac4e4209240a..0e82673d02240 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
+++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
@@ -16,12 +16,15 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Client;
 using Apache.Arrow.Flight.TestWeb;
 using Apache.Arrow.Tests;
 using Google.Protobuf;
+using Grpc.Core;
 using Grpc.Core.Utils;
+using Python.Runtime;
 using Xunit;
 
 namespace Apache.Arrow.Flight.Tests
@@ -68,14 +71,14 @@ private FlightInfo GivenStoreBatches(FlightDescriptor flightDescriptor, params R
         {
             var initialBatch = batches.FirstOrDefault();
 
-            var flightHolder = new FlightHolder(flightDescriptor, initialBatch.RecordBatch.Schema, _testWebFactory.GetAddress());
+            var flightHolder = new FlightHolder(flightDescriptor, initialBatch?.RecordBatch.Schema, _testWebFactory.GetAddress());
 
-            foreach(var batch in batches)
+            foreach (var batch in batches)
             {
                 flightHolder.AddBatch(batch);
             }
 
-            _flightStore.Flights.Add(flightDescriptor, flightHolder);
+            _flightStore.Flights[flightDescriptor] = flightHolder;
 
             return flightHolder.GetFlightInfo();
         }
@@ -121,6 +124,31 @@ public async Task TestPutTwoRecordBatches()
             ArrowReaderVerifier.CompareBatches(expectedBatch2, actualBatches[1].RecordBatch);
         }
 
+        [Fact]
+        public async Task TestGetRecordBatchWithDelayedSchema()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test");
+            var expectedBatch = CreateTestBatch(0, 100);
+
+            //Add flight info only to the in memory store without schema or batch
+            GivenStoreBatches(flightDescriptor);
+
+            //Get the flight info for the ticket and verify the schema is null
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            Assert.Single(flightInfo.Endpoints);
+            Assert.Null(flightInfo.Schema);
+
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+
+            //Update the store with the batch and schema
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch));
+            var getStream = _flightClient.GetStream(endpoint.Ticket);
+            var resultList = await getStream.ResponseStream.ToListAsync();
+
+            Assert.Single(resultList);
+            ArrowReaderVerifier.CompareBatches(expectedBatch, resultList[0]);
+        }
+
         [Fact]
         public async Task TestGetSingleRecordBatch()
         {
@@ -187,8 +215,8 @@ public async Task TestGetFlightMetadata()
 
             var getStream = _flightClient.GetStream(endpoint.Ticket);
 
-            List<ByteString> actualMetadata = new List<ByteString>(); 
-            while(await getStream.ResponseStream.MoveNext(default))
+            List<ByteString> actualMetadata = new List<ByteString>();
+            while (await getStream.ResponseStream.MoveNext(default))
             {
                 actualMetadata.AddRange(getStream.ResponseStream.ApplicationMetadata);
             }
@@ -277,7 +305,7 @@ public async Task TestListFlights()
 
             var actualFlights = await listFlightStream.ResponseStream.ToListAsync();
 
-            for(int i = 0; i < expectedFlightInfo.Count; i++)
+            for (int i = 0; i < expectedFlightInfo.Count; i++)
             {
                 FlightInfoComparer.Compare(expectedFlightInfo[i], actualFlights[i]);
             }
@@ -386,7 +414,7 @@ public async Task TestGetBatchesWithAsyncEnumerable()
 
 
             List<RecordBatch> resultList = new List<RecordBatch>();
-            await foreach(var recordBatch in getStream.ResponseStream)
+            await foreach (var recordBatch in getStream.ResponseStream)
             {
                 resultList.Add(recordBatch);
             }
@@ -415,5 +443,89 @@ public async Task EnsureTheSerializedBatchContainsTheProperTotalRecordsAndTotalB
             Assert.Equal(expectedBatch.Length, result.TotalRecords);
             Assert.Equal(expectedTotalBytes, result.TotalBytes);
         }
+
+        [Fact]
+        public async Task EnsureCallRaisesDeadlineExceeded()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_deadline");
+            var deadline = DateTime.UtcNow;
+            var batch = CreateTestBatch(0, 100);
+
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+        }
+
+        [Fact]
+        public async Task EnsureCallRaisesRequestCancelled()
+        {
+            var cts = new CancellationTokenSource();
+            cts.CancelAfter(1);
+            
+            var batch = CreateTestBatch(0, 100);
+            var metadata = new Metadata();
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_cancelled");
+            await Task.Delay(5);
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index f05338313063c..ee71b203218f8 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -16,7 +16,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.0" />
     <PackageReference Include="xunit" Version="2.9.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
       <PrivateAssets>all</PrivateAssets>
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 47310c905a9ff..f63aa0d95a484 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1845,7 +1845,7 @@ def generate_nested_dictionary_case():
 def generate_extension_case():
     dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0')
 
-    uuid_type = ExtensionType('uuid', 'uuid-serialized',
+    uuid_type = ExtensionType('arrow.uuid', '',
                               FixedSizeBinaryField('', 16))
     dict_ext_type = ExtensionType(
         'dict-extension', 'dict-extension-serialized',
@@ -1936,7 +1936,6 @@ def _temp_path():
 
         generate_list_view_case()
         .skip_tester('C#')     # Doesn't support large list views
-        .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('nanoarrow')
         .skip_tester('Rust'),
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 6a36109dc2fc1..cdea4ca0d00a1 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -24,7 +24,7 @@
 # - JDK >= 11
 # - gcc >= 4.8
 # - Node.js >= 18
-# - Go >= 1.21
+# - Go >= 1.22
 # - Docker
 #
 # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
@@ -403,7 +403,7 @@ install_go() {
     return 0
   fi
 
-  local version=1.21.8
+  local version=1.22.6
   show_info "Installing go version ${version}..."
 
   local arch="$(uname -m)"
@@ -512,7 +512,7 @@ install_maven() {
     show_info "System Maven version ${SYSTEM_MAVEN_VERSION} matches required Maven version ${MAVEN_VERSION}. Skipping installation."
   else
     # Append pipe character to make preview release versions like "X.Y.Z-beta-1" sort
-    # as older than their corresponding release version "X.Y.Z". This works because 
+    # as older than their corresponding release version "X.Y.Z". This works because
     # `sort -V` orders the pipe character lower than any version number character.
     older_version=$(printf '%s\n%s\n' "$SYSTEM_MAVEN_VERSION" "$MAVEN_VERSION" | sed 's/$/|/' | sort -V | sed 's/|$//' | head -n1)
     if [[ "$older_version" == "$SYSTEM_MAVEN_VERSION" ]]; then
@@ -953,7 +953,7 @@ test_go() {
   show_header "Build and test Go libraries"
 
   maybe_setup_go
-  maybe_setup_conda compilers go=1.21
+  maybe_setup_conda compilers go=1.22
 
   pushd go
   go get -v ./...
@@ -1146,7 +1146,7 @@ test_linux_wheels() {
     local arch="x86_64"
   fi
 
-  local python_versions="${TEST_PYTHON_VERSIONS:-3.8 3.9 3.10 3.11 3.12}"
+  local python_versions="${TEST_PYTHON_VERSIONS:-3.8 3.9 3.10 3.11 3.12 3.13}"
   local platform_tags="${TEST_WHEEL_PLATFORM_TAGS:-manylinux_2_17_${arch}.manylinux2014_${arch} manylinux_2_28_${arch}}"
 
   for python in ${python_versions}; do
@@ -1170,11 +1170,11 @@ test_macos_wheels() {
 
   # apple silicon processor
   if [ "$(uname -m)" = "arm64" ]; then
-    local python_versions="3.8 3.9 3.10 3.11 3.12"
+    local python_versions="3.8 3.9 3.10 3.11 3.12 3.13"
     local platform_tags="macosx_11_0_arm64"
     local check_flight=OFF
   else
-    local python_versions="3.8 3.9 3.10 3.11 3.12"
+    local python_versions="3.8 3.9 3.10 3.11 3.12 3.13"
     local platform_tags="macosx_10_15_x86_64"
   fi
 
diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml
index 9c7adf53a6f70..8c04da8a91a4f 100644
--- a/dev/tasks/docker-tests/github.cuda.yml
+++ b/dev/tasks/docker-tests/github.cuda.yml
@@ -26,6 +26,8 @@ jobs:
     runs-on: ['self-hosted', 'cuda']
 {{ macros.github_set_env(env) }}
     timeout-minutes: {{ timeout|default(60) }}
+    env:
+      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
     steps:
       {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }}
       # python 3.8 is installed on the runner, no need to install
@@ -34,7 +36,6 @@ jobs:
       - name: Execute Docker Build
         shell: bash
         env:
-          ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: |
           archery docker run \
diff --git a/dev/tasks/docs/github.linux.yml b/dev/tasks/docs/github.linux.yml
index 8ab8a593c3ef3..5863d68d2c828 100644
--- a/dev/tasks/docs/github.linux.yml
+++ b/dev/tasks/docs/github.linux.yml
@@ -34,8 +34,10 @@ jobs:
         env:
           ARROW_JAVA_SKIP_GIT_PLUGIN: true
         run: |
+          mkdir -p build
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
+            -v $PWD/build/:/build/ \
             {{ flags|default("") }} \
             {{ image }} \
             {{ command|default("") }}
@@ -45,7 +47,7 @@ jobs:
           ref: {{ default_branch|default("main") }}
           path: crossbow
           fetch-depth: 1
-      {% if  publish %}
+      {% if publish %}
       - name: Prepare Docs Preview
         run: |
           # build files are created by the docker user
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 7cbd5f05dab4a..bdbed1bd678e6 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -30,7 +30,6 @@ jobs:
       ARCH: {{ '${{ matrix.platform.archery_arch }}' }}
       ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }}
       ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }}
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: {{ "${{matrix.platform.archery_use_legacy_docker_compose || '0'}}" }}
     strategy:
       fail-fast: false
       matrix:
@@ -45,7 +44,6 @@ jobs:
             archery_arch: "arm64v8"
             archery_arch_alias: "aarch64"
             archery_arch_short: "arm64"
-            archery_use_legacy_docker_compose: "1"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_free_space()|indent }}
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 4bf2295ef3e95..cce976cd60e4e 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -29,7 +29,6 @@ jobs:
     {% endif %}
     env:
       ARCHITECTURE: {{ architecture }}
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: {{ '1' if architecture == 'arm64' else '0' }}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_login_dockerhub()|indent }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 6423ca0e9efda..082d33b124f9f 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -169,16 +169,32 @@ env:
   - name: Upload package to Gemfury
     shell: bash
     run: |
-      fury push \
-        --api-token=${CROSSBOW_GEMFURY_TOKEN} \
-        --as=${CROSSBOW_GEMFURY_ORG} \
-        {{ pattern }}
+      if $(fury versions --as=${CROSSBOW_GEMFURY_ORG} --api-token=${CROSSBOW_GEMFURY_TOKEN} pyarrow | grep --fixed-strings -q "{{ arrow.no_rc_version }}"); then
+        echo "Version {{ arrow.no_rc_version }} already exists. Avoid pushing version."
+      else
+        fury push \
+          --api-token=${CROSSBOW_GEMFURY_TOKEN} \
+          --as=${CROSSBOW_GEMFURY_ORG} \
+          {{ pattern }}
+      fi
     env:
       CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
       CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
   {% endif %}
 {% endmacro %}
 
+{%- macro github_upload_wheel_scientific_python(pattern) -%}
+  {%- if arrow.is_default_branch() -%}
+  - name: Upload wheel to Anaconda scientific-python
+    shell: bash
+    run: |
+      python3 -m pip install git+https://github.com/Anaconda-Platform/anaconda-client.git@1.12.3
+      anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label main {{ pattern }}
+    env:
+      CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN: {{ '${{ secrets.CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN }}' }}
+  {% endif %}
+{% endmacro %}
+
 {%- macro azure_checkout_arrow() -%}
   - script: |
       git clone --no-checkout --branch {{ arrow.branch }} {{ arrow.remote }} arrow
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 968c5da21897b..f9df27ba3175b 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -33,9 +33,13 @@ jobs:
       ARCH: amd64
       {% else %}
       ARCH: arm64v8
-      ARCHERY_USE_LEGACY_DOCKER_COMPOSE: 1
       {% endif %}
       PYTHON: "{{ python_version }}"
+      {% if python_version == "3.13" %}
+      PYTHON_IMAGE_TAG: "3.13-rc"
+      {% else %}
+      PYTHON_IMAGE_TAG: "{{ python_version }}"
+      {% endif %}
 
     steps:
       {{ macros.github_checkout_arrow()|indent }}
@@ -106,6 +110,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index 8ceb468af89dd..98e06a14ff222 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -113,6 +113,21 @@ jobs:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
 
+      # Use a well-known Python version for the GCS testbench, and avoid
+      # putting it in PATH.
+      - name: Set up Python for GCS testbench
+        uses: actions/setup-python@v5.1.1
+        id: gcs-python-install
+        with:
+          python-version: 3.12
+          update-environment: false
+
+      - name: Install GCS testbench
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: {{ '${{ steps.gcs-python-install.outputs.python-path }}' }}
+        run: arrow/ci/scripts/install_gcs_testbench.sh default
+
       - name: Test Wheel
         env:
           PYTEST_ADDOPTS: "-k 'not test_cancellation'"
@@ -121,8 +136,8 @@ jobs:
           source test-env/bin/activate
           pip install --upgrade pip wheel
           arch -{{ arch }} pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON=python arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
           arch -{{ arch }} arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index a40b9c0d65103..3a943b6ae515c 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -71,6 +71,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index fe02fe9ce68b2..b7e0c1601e336 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -29,6 +29,7 @@ groups:
 
   wheel:
     - wheel-*
+    - python-sdist
 
   linux:
     - almalinux-*
@@ -389,7 +390,8 @@ tasks:
                                                ("3.9", "cp39", "cp39"),
                                                ("3.10", "cp310", "cp310"),
                                                ("3.11", "cp311", "cp311"),
-                                               ("3.12", "cp312", "cp312")] %}
+                                               ("3.12", "cp312", "cp312"),
+                                               ("3.13", "cp313", "cp313")] %}
 
 {############################## Wheel Linux ##################################}
 
@@ -412,7 +414,7 @@ tasks:
 
 {############################## Wheel macOS ####################################}
 
-{% for macos_version, macos_codename in [("10.15", "catalina")] %}
+{% for macos_version, macos_codename in [("12.0", "monterey")] %}
   {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
   wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
@@ -423,25 +425,25 @@ tasks:
       arrow_jemalloc: "ON"
       python_version: "{{ python_version }}"
       macos_deployment_target: "{{ macos_version }}"
-      runs_on: "macos-13"
+      runs_on: "macos-12"
       vcpkg_arch: "amd64"
     artifacts:
       - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
 
 {% endfor %}
 
-  wheel-macos-big-sur-{{ python_tag }}-arm64:
+  wheel-macos-monterey-{{ python_tag }}-arm64:
     ci: github
     template: python-wheels/github.osx.yml
     params:
       arch: "arm64"
       arrow_jemalloc: "OFF"
       python_version: "{{ python_version }}"
-      macos_deployment_target: "11.0"
+      macos_deployment_target: "12.0"
       runs_on: "macos-14"
       vcpkg_arch: "arm64"
     artifacts:
-      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_12_0_arm64.whl
 
 {############################## Wheel Windows ################################}
 
@@ -1474,7 +1476,7 @@ tasks:
         R_PRUNE_DEPS: TRUE
       image: r-clang-sanitizer
 
-  {% for go_version, staticcheck in [("1.21", "v0.4.7"), ("1.22", "latest")] %}
+  {% for go_version, staticcheck in [("1.22", "v0.5.1"), ("1.23", "latest")] %}
   test-debian-12-go-{{ go_version }}:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1486,7 +1488,7 @@ tasks:
       image: debian-go
   {% endfor %}
 
-  # be sure to update binary-task.rb when upgrading ubuntu
+  # be sure to update binary-task.rb when upgrading Debian
   test-debian-12-docs:
     ci: github
     template: docs/github.linux.yml
@@ -1494,7 +1496,6 @@ tasks:
       env:
         JDK: 17
       pr_number: Unset
-      flags: "-v $PWD/build/:/build/"
       image: debian-docs
       publish: false
     artifacts:
@@ -1620,6 +1621,5 @@ tasks:
       env:
         JDK: 17
       pr_number: Unset
-      flags: "-v $PWD/build/:/build/"
       image: debian-docs
       publish: true
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.yml
index 4bc3fff71b64a..e2bc7895c6d05 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -22,7 +22,7 @@
 {% set use_conda = use_conda|default(False) %}
 # env: is generated by macros.github_header()
   # Current oldest supported version according to https://endoflife.date/macos
-  MACOSX_DEPLOYMENT_TARGET: "10.15"
+  MACOSX_DEPLOYMENT_TARGET: "12.0"
 
 jobs:
   verify:
diff --git a/docker-compose.yml b/docker-compose.yml
index 14eeeeee6e5ef..19a9dd0de3932 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -53,8 +53,6 @@
 #
 # See more in cpp/build-support/run-test.sh::print_coredumps
 
-version: '3.5'
-
 x-common: &common
   GITHUB_ACTIONS:
 
@@ -126,6 +124,7 @@ x-hierarchy:
         - conda-python-hdfs
         - conda-python-java-integration
         - conda-python-jpype
+        - conda-python-no-numpy
         - conda-python-spark
         - conda-python-substrait
   - conda-verify-rc
@@ -1096,9 +1095,10 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-02-04-ea37246
+        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-08-03-32dfa47
         vcpkg: ${VCPKG}
         python: ${PYTHON}
+        python_image_tag: ${PYTHON_IMAGE_TAG}
         manylinux: 2014
       context: .
       dockerfile: ci/docker/python-wheel-manylinux.dockerfile
@@ -1119,9 +1119,10 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-02-04-ea37246
+        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-08-03-32dfa47
         vcpkg: ${VCPKG}
         python: ${PYTHON}
+        python_image_tag: ${PYTHON_IMAGE_TAG}
         manylinux: 2_28
       context: .
       dockerfile: ci/docker/python-wheel-manylinux.dockerfile
@@ -1135,7 +1136,7 @@ services:
     command: /arrow/ci/scripts/python_wheel_manylinux_build.sh
 
   python-wheel-manylinux-test-imports:
-    image: ${ARCH}/python:${PYTHON}
+    image: ${ARCH}/python:${PYTHON_IMAGE_TAG}
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
@@ -1151,6 +1152,7 @@ services:
       args:
         arch: ${ARCH}
         python: ${PYTHON}
+        python_image_tag: ${PYTHON_IMAGE_TAG}
       context: .
       dockerfile: ci/docker/python-wheel-manylinux-test.dockerfile
       cache_from:
@@ -1255,6 +1257,37 @@ services:
     volumes: *conda-volumes
     command: *python-conda-command
 
+  conda-python-no-numpy:
+    # Usage:
+    #   docker-compose build conda
+    #   docker-compose build conda-cpp
+    #   docker-compose build conda-python
+    #   docker-compose build conda-python-no-numpy
+    #   docker-compose run --rm conda-python-no-numpy
+    image: ${REPO}:${ARCH}-conda-python-${PYTHON}-no-numpy
+    build:
+      context: .
+      dockerfile: ci/docker/conda-python.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-conda-python-${PYTHON}
+      args:
+        repo: ${REPO}
+        arch: ${ARCH}
+        python: ${PYTHON}
+    shm_size: *shm-size
+    environment:
+      <<: [*common, *ccache, *sccache]
+      PARQUET_REQUIRE_ENCRYPTION:  # inherit
+      HYPOTHESIS_PROFILE:  # inherit
+      PYARROW_TEST_HYPOTHESIS:  # inherit
+    volumes: *conda-volumes
+    command:
+      ["
+        /arrow/ci/scripts/cpp_build.sh /arrow /build &&
+        /arrow/ci/scripts/python_build.sh /arrow /build &&
+        mamba uninstall -y numpy &&
+        /arrow/ci/scripts/python_test.sh /arrow"]
+
   conda-python-docs:
     # Usage:
     #   archery docker run conda-python-docs
diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst
index 68f3c7d709791..129b5d0bcf135 100644
--- a/docs/source/developers/continuous_integration/docker.rst
+++ b/docs/source/developers/continuous_integration/docker.rst
@@ -156,6 +156,18 @@ The following example starts an interactive ``bash`` session in the container
 
     archery docker run ubuntu-cpp bash
 
+**Build the image with increased debugging output:**
+
+To enable additional logging output for debugging, pass the ``--debug`` flag
+to ``archery``.
+
+.. code:: bash
+
+    archery --debug docker run ubuntu-cpp
+
+In addition to enabling ``DEBUG``-level logging, this also translates to
+passing ``--progress=plain`` to docker(-compose) build command.
+
 Docker Volume Caches
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/developers/guide/resources.rst b/docs/source/developers/guide/resources.rst
index b5905af65499b..5b598ab1296ac 100644
--- a/docs/source/developers/guide/resources.rst
+++ b/docs/source/developers/guide/resources.rst
@@ -71,7 +71,6 @@ Contributing
 
 - :ref:`contributing`
 - `Arrow R Developer Guide <https://arrow.apache.org/docs/r/articles/developing.html>`_
-- `Writing Bindings article for R package <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
 
 Reproducible examples:
 
diff --git a/docs/source/developers/guide/step_by_step/arrow_codebase.rst b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
index 0c194ab3a3f70..c4ea61d89ff80 100644
--- a/docs/source/developers/guide/step_by_step/arrow_codebase.rst
+++ b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
@@ -150,6 +150,3 @@ C++ we must create the binding manually to use it in that implementation.
       When writing bindings between C++ compute functions and R functions,
       the aim is to expose the C++ functionality via the same interface as
       existing R functions.
-
-      To read the full content on the topic of R bindings read through the
-      `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
diff --git a/docs/source/developers/guide/tutorials/r_tutorial.rst b/docs/source/developers/guide/tutorials/r_tutorial.rst
index 62d5cfcbc76c2..3fba873bff0a9 100644
--- a/docs/source/developers/guide/tutorials/r_tutorial.rst
+++ b/docs/source/developers/guide/tutorials/r_tutorial.rst
@@ -27,22 +27,6 @@ R tutorials
 ***********
 
 
-Writing Bindings Walkthrough
-============================
-
-The first R package tutorial to be included in the New Contributor's
-guide is a **Walkthrough** added in the **Writing Bindings**
-vignette. With time we will try to include additional tutorials
-directly into this guide.
-
-This tutorial will show how to do a binding of a C++ function
-`starts_with() <https://arrow.apache.org/docs/cpp/compute.html#containment-tests>`_
-to the (base) R function ``startsWith()``.
-
-To view the tutorial follow the
-`Walkthrough section of the Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html#walkthrough>`_.
-
-
 R tutorial on adding a lubridate binding
 ========================================
 
@@ -56,11 +40,6 @@ The binding will be added to the ``expression.R`` file in the
 R package. But you can also follow these steps in case you are
 adding a binding that will live somewhere else.
 
-.. seealso::
-
-   To read more about the philosophy behind R bindings, refer to the
-   `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
-
 This tutorial is different from the :ref:`step_by_step` as we
 will be working on a specific case. This tutorial is not meant
 as a step-by-step guide.
@@ -170,13 +149,6 @@ equivalent data types. lubridate's ``mday()`` function has no additional
 arguments and there are also no option classes associated with Arrow C++
 function ``day()``.
 
-.. note::
-
-   To see what to do if there is an option class associated with the
-   function you are binding, refer to
-   `Examining the C++ function <https://arrow.apache.org/docs/r/articles/developers/bindings.html#examining-the-c-function>`_ from the Writing Bindings
-   article.
-
 Looking at the code in ``expressions.R`` we can see the day function
 is already specified/mapped on the R package side:
 `<https://github.com/apache/arrow/blob/658bec37aa5cbdd53b5e4cdc81b8ba3962e67f11/r/R/expression.R#L63-L64>`_
diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst
index 5658f949ceeaa..1106f8aaffdd3 100644
--- a/docs/source/format/CanonicalExtensions.rst
+++ b/docs/source/format/CanonicalExtensions.rst
@@ -272,6 +272,8 @@ JSON
   In the future, additional fields may be added, but they are not required
   to interpret the array.
 
+.. _uuid_extension:
+
 UUID
 ====
 
diff --git a/docs/source/status.rst b/docs/source/status.rst
index c232aa280befb..b685d4bbf8add 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -119,6 +119,12 @@ Data Types
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Variable shape tensor |       |       |       |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| JSON                  |       |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| UUID                  | ✓     |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| 8-bit Boolean         | ✓     |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 Notes:
 
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index 4d83766b4fa3e..4f0627c600078 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -21,9 +21,9 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -122,7 +122,7 @@ func TestMakeFromData(t *testing.T) {
 		{name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)},
 
 		{name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"},
-		{name: "extension", d: types.NewUUIDType()},
+		{name: "extension", d: extensions.NewUUIDType()},
 
 		{name: "run end encoded", d: arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{
 			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go
index 65d212be11838..9c9ce6a53aed0 100644
--- a/go/arrow/array/diff_test.go
+++ b/go/arrow/array/diff_test.go
@@ -25,9 +25,9 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/internal/json"
-	"github.com/apache/arrow/go/v18/internal/types"
 )
 
 type diffTestCase struct {
@@ -861,7 +861,7 @@ func TestEdits_UnifiedDiff(t *testing.T) {
 		},
 		{
 			name:       "extensions",
-			dataType:   types.NewUUIDType(),
+			dataType:   extensions.NewUUIDType(),
 			baseJSON:   `["00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001"]`,
 			targetJSON: `["00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002"]`,
 			want: `@@ -0, +0 @@
diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go
index 71ea9f105af7c..26245cf015dec 100644
--- a/go/arrow/array/extension_test.go
+++ b/go/arrow/array/extension_test.go
@@ -30,16 +30,6 @@ type ExtensionTypeTestSuite struct {
 	suite.Suite
 }
 
-func (e *ExtensionTypeTestSuite) SetupTest() {
-	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (e *ExtensionTypeTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		e.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (e *ExtensionTypeTestSuite) TestParametricEquals() {
 	p1Type := types.NewParametric1Type(6)
 	p2Type := types.NewParametric1Type(6)
diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go
index e07cd380d511f..dab2b33dce601 100644
--- a/go/arrow/avro/reader_types.go
+++ b/go/arrow/avro/reader_types.go
@@ -27,8 +27,8 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 )
 
 type dataLoader struct {
@@ -436,7 +436,7 @@ func mapFieldBuilders(b array.Builder, field arrow.Field, parent *fieldPos) {
 			}
 			return nil
 		}
-	case *types.UUIDBuilder:
+	case *extensions.UUIDBuilder:
 		f.appendFunc = func(data interface{}) error {
 			switch dt := data.(type) {
 			case nil:
diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go
index 007dad06c19cd..a6de3718d3ccf 100644
--- a/go/arrow/avro/schema.go
+++ b/go/arrow/avro/schema.go
@@ -24,7 +24,7 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
-	"github.com/apache/arrow/go/v18/internal/types"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/internal/utils"
 	avro "github.com/hamba/avro/v2"
 )
@@ -349,7 +349,7 @@ func avroLogicalToArrowField(n *schemaNode) {
 		// The uuid logical type represents a random generated universally unique identifier (UUID).
 		// A uuid logical type annotates an Avro string. The string has to conform with RFC-4122
 	case "uuid":
-		dt = types.NewUUIDType()
+		dt = extensions.NewUUIDType()
 
 	// The date logical type represents a date within the calendar, with no reference to a particular
 	// time zone or time of day.
diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go
index 2e748a2fee9c2..db6098225dda8 100644
--- a/go/arrow/compute/cast_test.go
+++ b/go/arrow/compute/cast_test.go
@@ -129,7 +129,7 @@ func checkScalarWithScalars(t *testing.T, funcName string, inputs []scalar.Scala
 			fmt.Fprintf(&b, " (types differed: %s vs %s)",
 				out.(*compute.ScalarDatum).Type(), expected.DataType())
 		}
-		t.Fatalf(b.String())
+		t.Fatal(b.String())
 	}
 }
 
@@ -2636,7 +2636,7 @@ func (c *CastSuite) TestStructToDifferentNullabilityStruct() {
 		defer dest3Nullable.Release()
 		checkCast(c.T(), srcNonNull, dest3Nullable, *compute.DefaultCastOptions(true))
 	})
-	c.Run("non-nullable to nullable", func() {
+	c.Run("nullable to non-nullable", func() {
 		fieldsSrcNullable := []arrow.Field{
 			{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true},
 			{Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true},
diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go
index f5beb45ee1494..018fbb7d623d9 100644
--- a/go/arrow/compute/exec/span_test.go
+++ b/go/arrow/compute/exec/span_test.go
@@ -29,6 +29,7 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/compute/exec"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/apache/arrow/go/v18/internal/types"
@@ -192,9 +193,6 @@ func TestArraySpan_NumBuffers(t *testing.T) {
 		Children []exec.ArraySpan
 	}
 
-	arrow.RegisterExtensionType(types.NewUUIDType())
-	defer arrow.UnregisterExtensionType("uuid")
-
 	tests := []struct {
 		name   string
 		fields fields
@@ -207,7 +205,7 @@ func TestArraySpan_NumBuffers(t *testing.T) {
 		{"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3},
 		{"string", fields{Type: arrow.BinaryTypes.String}, 3},
 		{"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3},
-		{"extension", fields{Type: types.NewUUIDType()}, 2},
+		{"extension", fields{Type: extensions.NewUUIDType()}, 2},
 		{"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2},
 	}
 	for _, tt := range tests {
diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go
index b0775b9b11a96..6a89d49704298 100644
--- a/go/arrow/csv/reader_test.go
+++ b/go/arrow/csv/reader_test.go
@@ -30,8 +30,8 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/csv"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -356,7 +356,7 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool, stringsCanBeN
 			{Name: "binary", Type: arrow.BinaryTypes.Binary},
 			{Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary},
 			{Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}},
-			{Name: "uuid", Type: types.NewUUIDType()},
+			{Name: "uuid", Type: extensions.NewUUIDType()},
 			{Name: "date32", Type: arrow.PrimitiveTypes.Date32},
 			{Name: "date64", Type: arrow.PrimitiveTypes.Date64},
 		},
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index be9ab961c3ef7..2ae01a6d49071 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -31,9 +31,9 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/csv"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/float16"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/google/uuid"
 )
 
@@ -230,7 +230,7 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 			{Name: "binary", Type: arrow.BinaryTypes.Binary},
 			{Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary},
 			{Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}},
-			{Name: "uuid", Type: types.NewUUIDType()},
+			{Name: "uuid", Type: extensions.NewUUIDType()},
 			{Name: "null", Type: arrow.Null},
 		},
 		nil,
@@ -285,7 +285,7 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 	b.Field(22).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
 	b.Field(23).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
 	b.Field(24).(*array.FixedSizeBinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
-	b.Field(25).(*types.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"), uuid.MustParse("00000000-0000-0000-0000-000000000002"), uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
+	b.Field(25).(*extensions.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"), uuid.MustParse("00000000-0000-0000-0000-000000000002"), uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
 	b.Field(26).(*array.NullBuilder).AppendEmptyValues(3)
 
 	for _, field := range b.Fields() {
diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go
index c3e595f523e57..7244d377bd285 100644
--- a/go/arrow/datatype_extension_test.go
+++ b/go/arrow/datatype_extension_test.go
@@ -21,7 +21,7 @@ import (
 	"testing"
 
 	"github.com/apache/arrow/go/v18/arrow"
-	"github.com/apache/arrow/go/v18/internal/types"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
@@ -50,24 +50,14 @@ type ExtensionTypeTestSuite struct {
 	suite.Suite
 }
 
-func (e *ExtensionTypeTestSuite) SetupTest() {
-	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (e *ExtensionTypeTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		e.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (e *ExtensionTypeTestSuite) TestExtensionType() {
 	e.Nil(arrow.GetExtensionType("uuid-unknown"))
-	e.NotNil(arrow.GetExtensionType("uuid"))
+	e.NotNil(arrow.GetExtensionType("arrow.uuid"))
 
-	e.Error(arrow.RegisterExtensionType(types.NewUUIDType()))
+	e.Error(arrow.RegisterExtensionType(extensions.NewUUIDType()))
 	e.Error(arrow.UnregisterExtensionType("uuid-unknown"))
 
-	typ := types.NewUUIDType()
+	typ := extensions.NewUUIDType()
 	e.Implements((*arrow.ExtensionType)(nil), typ)
 	e.Equal(arrow.EXTENSION, typ.ID())
 	e.Equal("extension", typ.Name())
diff --git a/go/arrow/extensions/bool8_test.go b/go/arrow/extensions/bool8_test.go
index 9f7365d1555fb..ff129e24bc8f0 100644
--- a/go/arrow/extensions/bool8_test.go
+++ b/go/arrow/extensions/bool8_test.go
@@ -178,9 +178,6 @@ func TestReinterpretStorageEqualToValues(t *testing.T) {
 
 func TestBool8TypeBatchIPCRoundTrip(t *testing.T) {
 	typ := extensions.NewBool8Type()
-	arrow.RegisterExtensionType(typ)
-	defer arrow.UnregisterExtensionType(typ.ExtensionName())
-
 	storage, _, err := array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8,
 		strings.NewReader(`[-1, 0, 1, 2, null]`))
 	require.NoError(t, err)
diff --git a/go/arrow/extensions/extensions.go b/go/arrow/extensions/extensions.go
new file mode 100644
index 0000000000000..03c6923e95f4f
--- /dev/null
+++ b/go/arrow/extensions/extensions.go
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"github.com/apache/arrow/go/v18/arrow"
+)
+
+var canonicalExtensionTypes = []arrow.ExtensionType{
+	&Bool8Type{},
+	&UUIDType{},
+	&OpaqueType{},
+	&JSONType{},
+}
+
+func init() {
+	for _, extType := range canonicalExtensionTypes {
+		if err := arrow.RegisterExtensionType(extType); err != nil {
+			panic(err)
+		}
+	}
+}
diff --git a/go/arrow/extensions/json.go b/go/arrow/extensions/json.go
new file mode 100644
index 0000000000000..12c49f9c0a76d
--- /dev/null
+++ b/go/arrow/extensions/json.go
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"fmt"
+	"reflect"
+	"slices"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet/schema"
+)
+
+var jsonSupportedStorageTypes = []arrow.DataType{
+	arrow.BinaryTypes.String,
+	arrow.BinaryTypes.LargeString,
+	arrow.BinaryTypes.StringView,
+}
+
+// JSONType represents a UTF-8 encoded JSON string as specified in RFC8259.
+type JSONType struct {
+	arrow.ExtensionBase
+}
+
+// ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
+func (b *JSONType) ParquetLogicalType() schema.LogicalType {
+	return schema.JSONLogicalType{}
+}
+
+// NewJSONType creates a new JSONType with the specified storage type.
+// storageType must be one of String, LargeString, StringView.
+func NewJSONType(storageType arrow.DataType) (*JSONType, error) {
+	if !slices.Contains(jsonSupportedStorageTypes, storageType) {
+		return nil, fmt.Errorf("unsupported storage type for JSON extension type: %s", storageType)
+	}
+	return &JSONType{ExtensionBase: arrow.ExtensionBase{Storage: storageType}}, nil
+}
+
+func (b *JSONType) ArrayType() reflect.Type { return reflect.TypeOf(JSONArray{}) }
+
+func (b *JSONType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if !(data == "" || data == "{}") {
+		return nil, fmt.Errorf("serialized metadata for JSON extension type must be '' or '{}', found: %s", data)
+	}
+	return NewJSONType(storageType)
+}
+
+func (b *JSONType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return b.ExtensionName() == other.ExtensionName() && arrow.TypeEqual(b.Storage, other.StorageType())
+}
+
+func (b *JSONType) ExtensionName() string { return "arrow.json" }
+
+func (b *JSONType) Serialize() string { return "" }
+
+func (b *JSONType) String() string {
+	return fmt.Sprintf("extension<%s[storage_type=%s]>", b.ExtensionName(), b.Storage)
+}
+
+// JSONArray is logically an array of UTF-8 encoded JSON strings.
+// Its values are unmarshaled to native Go values.
+type JSONArray struct {
+	array.ExtensionArrayBase
+}
+
+func (a *JSONArray) String() string {
+	b, err := a.MarshalJSON()
+	if err != nil {
+		panic(fmt.Sprintf("failed marshal JSONArray: %s", err))
+	}
+
+	return string(b)
+}
+
+func (a *JSONArray) Value(i int) any {
+	val := a.ValueBytes(i)
+
+	var res any
+	if err := json.Unmarshal(val, &res); err != nil {
+		panic(err)
+	}
+
+	return res
+}
+
+func (a *JSONArray) ValueStr(i int) string {
+	return string(a.ValueBytes(i))
+}
+
+func (a *JSONArray) ValueBytes(i int) []byte {
+	// convert to json.RawMessage, set to nil if elem isNull.
+	val := a.ValueJSON(i)
+
+	// simply returns wrapped bytes, or null if val is nil.
+	b, err := val.MarshalJSON()
+	if err != nil {
+		panic(err)
+	}
+
+	return b
+}
+
+// ValueJSON wraps the underlying string value as a json.RawMessage,
+// or returns nil if the array value is null.
+func (a *JSONArray) ValueJSON(i int) json.RawMessage {
+	var val json.RawMessage
+	if a.IsValid(i) {
+		val = json.RawMessage(a.Storage().(array.StringLike).Value(i))
+	}
+	return val
+}
+
+// MarshalJSON implements json.Marshaler.
+// Marshaling json.RawMessage is a no-op, except that nil values will
+// be marshaled as a JSON null.
+func (a *JSONArray) MarshalJSON() ([]byte, error) {
+	values := make([]json.RawMessage, a.Len())
+	for i := 0; i < a.Len(); i++ {
+		values[i] = a.ValueJSON(i)
+	}
+	return json.Marshal(values)
+}
+
+// GetOneForMarshal implements arrow.Array.
+func (a *JSONArray) GetOneForMarshal(i int) interface{} {
+	return a.ValueJSON(i)
+}
+
+var (
+	_ arrow.ExtensionType  = (*JSONType)(nil)
+	_ array.ExtensionArray = (*JSONArray)(nil)
+)
diff --git a/go/arrow/extensions/json_test.go b/go/arrow/extensions/json_test.go
new file mode 100644
index 0000000000000..21acc58f93949
--- /dev/null
+++ b/go/arrow/extensions/json_test.go
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestJSONTypeBasics(t *testing.T) {
+	typ, err := extensions.NewJSONType(arrow.BinaryTypes.String)
+	require.NoError(t, err)
+
+	typLarge, err := extensions.NewJSONType(arrow.BinaryTypes.LargeString)
+	require.NoError(t, err)
+
+	typView, err := extensions.NewJSONType(arrow.BinaryTypes.StringView)
+	require.NoError(t, err)
+
+	assert.Equal(t, "arrow.json", typ.ExtensionName())
+	assert.Equal(t, "arrow.json", typLarge.ExtensionName())
+	assert.Equal(t, "arrow.json", typView.ExtensionName())
+
+	assert.True(t, typ.ExtensionEquals(typ))
+	assert.True(t, typLarge.ExtensionEquals(typLarge))
+	assert.True(t, typView.ExtensionEquals(typView))
+
+	assert.False(t, arrow.TypeEqual(arrow.BinaryTypes.String, typ))
+	assert.False(t, arrow.TypeEqual(typ, typLarge))
+	assert.False(t, arrow.TypeEqual(typ, typView))
+	assert.False(t, arrow.TypeEqual(typLarge, typView))
+
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.String, typ.StorageType()))
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.LargeString, typLarge.StorageType()))
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.StringView, typView.StorageType()))
+
+	assert.Equal(t, "extension<arrow.json[storage_type=utf8]>", typ.String())
+	assert.Equal(t, "extension<arrow.json[storage_type=large_utf8]>", typLarge.String())
+	assert.Equal(t, "extension<arrow.json[storage_type=string_view]>", typView.String())
+}
+
+var jsonTestCases = []struct {
+	Name           string
+	StorageType    arrow.DataType
+	StorageBuilder func(mem memory.Allocator) array.Builder
+}{
+	{
+		Name:           "string",
+		StorageType:    arrow.BinaryTypes.String,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewStringBuilder(mem) },
+	},
+	{
+		Name:           "large_string",
+		StorageType:    arrow.BinaryTypes.LargeString,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewLargeStringBuilder(mem) },
+	},
+	{
+		Name:           "string_view",
+		StorageType:    arrow.BinaryTypes.StringView,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewStringViewBuilder(mem) },
+	},
+}
+
+func TestJSONTypeCreateFromArray(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			require.Equal(t, "foobar", jsonArr.Value(0))
+			require.Equal(t, nil, jsonArr.Value(1))
+			require.Equal(t, map[string]any{"foo": "bar"}, jsonArr.Value(2))
+			require.Equal(t, float64(42), jsonArr.Value(3))
+			require.Equal(t, true, jsonArr.Value(4))
+			require.Equal(t, []any{float64(1), true, "3", nil, map[string]any{"five": float64(5)}}, jsonArr.Value(5))
+		})
+	}
+}
+
+func TestJSONTypeBatchIPCRoundTrip(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+				[]arrow.Array{arr}, -1)
+			defer batch.Release()
+
+			var written arrow.Record
+			{
+				var buf bytes.Buffer
+				wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+				require.NoError(t, wr.Write(batch))
+				require.NoError(t, wr.Close())
+
+				rdr, err := ipc.NewReader(&buf)
+				require.NoError(t, err)
+				written, err = rdr.Read()
+				require.NoError(t, err)
+				written.Retain()
+				defer written.Release()
+				rdr.Release()
+			}
+
+			assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+				batch.Schema(), written.Schema())
+
+			assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+				batch, written)
+		})
+	}
+}
+
+func TestMarshallJSONArray(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			b, err := jsonArr.MarshalJSON()
+			require.NoError(t, err)
+
+			expectedJSON := `["foobar",null,{"foo":"bar"},42,true,[1,true,"3",null,{"five":5}]]`
+			require.Equal(t, expectedJSON, string(b))
+			require.Equal(t, expectedJSON, jsonArr.String())
+		})
+	}
+}
+
+func TestJSONRecordToJSON(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			rec := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "json", Type: typ, Nullable: true}}, nil), []arrow.Array{jsonArr}, 6)
+			defer rec.Release()
+
+			buf := bytes.NewBuffer([]byte("\n")) // expected output has leading newline for clearer formatting
+			require.NoError(t, array.RecordToJSON(rec, buf))
+
+			expectedJSON := `
+				{"json":"foobar"}
+				{"json":null}
+				{"json":{"foo":"bar"}}
+				{"json":42}
+				{"json":true}
+				{"json":[1,true,"3",null,{"five":5}]}
+			`
+
+			expectedJSONLines := strings.Split(expectedJSON, "\n")
+			actualJSONLines := strings.Split(buf.String(), "\n")
+
+			require.Equal(t, len(expectedJSONLines), len(actualJSONLines))
+			for i := range expectedJSONLines {
+				if strings.TrimSpace(expectedJSONLines[i]) != "" {
+					require.JSONEq(t, expectedJSONLines[i], actualJSONLines[i])
+				}
+			}
+		})
+	}
+}
diff --git a/go/arrow/extensions/opaque_test.go b/go/arrow/extensions/opaque_test.go
index b6686e97bc027..a0fc8962ce5e4 100644
--- a/go/arrow/extensions/opaque_test.go
+++ b/go/arrow/extensions/opaque_test.go
@@ -161,9 +161,6 @@ func TestOpaqueTypeMetadataRoundTrip(t *testing.T) {
 
 func TestOpaqueTypeBatchRoundTrip(t *testing.T) {
 	typ := extensions.NewOpaqueType(arrow.BinaryTypes.String, "geometry", "adbc.postgresql")
-	arrow.RegisterExtensionType(typ)
-	defer arrow.UnregisterExtensionType(typ.ExtensionName())
-
 	storage, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String,
 		strings.NewReader(`["foobar", null]`))
 	require.NoError(t, err)
diff --git a/go/arrow/extensions/uuid.go b/go/arrow/extensions/uuid.go
new file mode 100644
index 0000000000000..422b9ea118800
--- /dev/null
+++ b/go/arrow/extensions/uuid.go
@@ -0,0 +1,265 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"strings"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet/schema"
+	"github.com/google/uuid"
+)
+
+type UUIDBuilder struct {
+	*array.ExtensionBuilder
+}
+
+// NewUUIDBuilder creates a new UUIDBuilder, exposing a convenient and efficient interface
+// for writing uuid.UUID (or [16]byte) values to the underlying FixedSizeBinary storage array.
+func NewUUIDBuilder(mem memory.Allocator) *UUIDBuilder {
+	return &UUIDBuilder{ExtensionBuilder: array.NewExtensionBuilder(mem, NewUUIDType())}
+}
+
+func (b *UUIDBuilder) Append(v uuid.UUID) {
+	b.AppendBytes(v)
+}
+
+func (b *UUIDBuilder) AppendBytes(v [16]byte) {
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).Append(v[:])
+}
+
+func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID) {
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).UnsafeAppend(v[:])
+}
+
+func (b *UUIDBuilder) AppendValueFromString(s string) error {
+	if s == array.NullValueStr {
+		b.AppendNull()
+		return nil
+	}
+
+	uid, err := uuid.Parse(s)
+	if err != nil {
+		return err
+	}
+
+	b.Append(uid)
+	return nil
+}
+
+func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool) {
+	if len(v) != len(valid) && len(valid) != 0 {
+		panic("len(v) != len(valid) && len(valid) != 0")
+	}
+
+	data := make([][]byte, len(v))
+	for i := range v {
+		if len(valid) > 0 && !valid[i] {
+			continue
+		}
+		data[i] = v[i][:]
+	}
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid)
+}
+
+func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error {
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	var val uuid.UUID
+	switch v := t.(type) {
+	case string:
+		val, err = uuid.Parse(v)
+		if err != nil {
+			return err
+		}
+	case []byte:
+		val, err = uuid.ParseBytes(v)
+		if err != nil {
+			return err
+		}
+	case nil:
+		b.AppendNull()
+		return nil
+	default:
+		return &json.UnmarshalTypeError{
+			Value:  fmt.Sprint(t),
+			Type:   reflect.TypeOf([]byte{}),
+			Offset: dec.InputOffset(),
+			Struct: fmt.Sprintf("FixedSizeBinary[%d]", 16),
+		}
+	}
+
+	b.Append(val)
+	return nil
+}
+
+func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error {
+	for dec.More() {
+		if err := b.UnmarshalOne(dec); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *UUIDBuilder) UnmarshalJSON(data []byte) error {
+	dec := json.NewDecoder(bytes.NewReader(data))
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	if delim, ok := t.(json.Delim); !ok || delim != '[' {
+		return fmt.Errorf("uuid builder must unpack from json array, found %s", delim)
+	}
+
+	return b.Unmarshal(dec)
+}
+
+// UUIDArray is a simple array which is a FixedSizeBinary(16)
+type UUIDArray struct {
+	array.ExtensionArrayBase
+}
+
+func (a *UUIDArray) String() string {
+	arr := a.Storage().(*array.FixedSizeBinary)
+	o := new(strings.Builder)
+	o.WriteString("[")
+	for i := 0; i < arr.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString(array.NullValueStr)
+		default:
+			fmt.Fprintf(o, "%q", a.Value(i))
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+func (a *UUIDArray) Value(i int) uuid.UUID {
+	if a.IsNull(i) {
+		return uuid.Nil
+	}
+	return uuid.Must(uuid.FromBytes(a.Storage().(*array.FixedSizeBinary).Value(i)))
+}
+
+func (a *UUIDArray) Values() []uuid.UUID {
+	values := make([]uuid.UUID, a.Len())
+	for i := range values {
+		values[i] = a.Value(i)
+	}
+	return values
+}
+
+func (a *UUIDArray) ValueStr(i int) string {
+	switch {
+	case a.IsNull(i):
+		return array.NullValueStr
+	default:
+		return a.Value(i).String()
+	}
+}
+
+func (a *UUIDArray) MarshalJSON() ([]byte, error) {
+	vals := make([]any, a.Len())
+	for i := range vals {
+		vals[i] = a.GetOneForMarshal(i)
+	}
+	return json.Marshal(vals)
+}
+
+func (a *UUIDArray) GetOneForMarshal(i int) interface{} {
+	if a.IsValid(i) {
+		return a.Value(i)
+	}
+	return nil
+}
+
+// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
+// to be used for representing UUIDs
+type UUIDType struct {
+	arrow.ExtensionBase
+}
+
+// ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
+func (e *UUIDType) ParquetLogicalType() schema.LogicalType {
+	return schema.UUIDLogicalType{}
+}
+
+// NewUUIDType is a convenience function to create an instance of UUIDType
+// with the correct storage type
+func NewUUIDType() *UUIDType {
+	return &UUIDType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
+}
+
+// ArrayType returns TypeOf(UUIDArray{}) for constructing UUID arrays
+func (*UUIDType) ArrayType() reflect.Type {
+	return reflect.TypeOf(UUIDArray{})
+}
+
+func (*UUIDType) ExtensionName() string {
+	return "arrow.uuid"
+}
+
+func (e *UUIDType) String() string {
+	return fmt.Sprintf("extension<%s>", e.ExtensionName())
+}
+
+func (e *UUIDType) MarshalJSON() ([]byte, error) {
+	return []byte(fmt.Sprintf(`{"name":"%s","metadata":%s}`, e.ExtensionName(), e.Serialize())), nil
+}
+
+func (*UUIDType) Serialize() string {
+	return ""
+}
+
+// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16}
+func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
+		return nil, fmt.Errorf("invalid storage type for UUIDType: %s", storageType.Name())
+	}
+	return NewUUIDType(), nil
+}
+
+// ExtensionEquals returns true if both extensions have the same name
+func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return e.ExtensionName() == other.ExtensionName()
+}
+
+func (*UUIDType) NewBuilder(mem memory.Allocator) array.Builder {
+	return NewUUIDBuilder(mem)
+}
+
+var (
+	_ arrow.ExtensionType          = (*UUIDType)(nil)
+	_ array.CustomExtensionBuilder = (*UUIDType)(nil)
+	_ array.ExtensionArray         = (*UUIDArray)(nil)
+	_ array.Builder                = (*UUIDBuilder)(nil)
+)
diff --git a/go/arrow/extensions/uuid_test.go b/go/arrow/extensions/uuid_test.go
new file mode 100644
index 0000000000000..80c621db2a0d5
--- /dev/null
+++ b/go/arrow/extensions/uuid_test.go
@@ -0,0 +1,257 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/google/uuid"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+var testUUID = uuid.New()
+
+func TestUUIDExtensionBuilder(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+	builder := extensions.NewUUIDBuilder(mem)
+	builder.Append(testUUID)
+	builder.AppendNull()
+	builder.AppendBytes(testUUID)
+	arr := builder.NewArray()
+	defer arr.Release()
+	arrStr := arr.String()
+	assert.Equal(t, fmt.Sprintf(`["%[1]s" (null) "%[1]s"]`, testUUID), arrStr)
+	jsonStr, err := json.Marshal(arr)
+	assert.NoError(t, err)
+
+	arr1, _, err := array.FromJSON(mem, extensions.NewUUIDType(), bytes.NewReader(jsonStr))
+	defer arr1.Release()
+	assert.NoError(t, err)
+	assert.True(t, array.Equal(arr1, arr))
+
+	require.NoError(t, json.Unmarshal(jsonStr, builder))
+	arr2 := builder.NewArray()
+	defer arr2.Release()
+	assert.True(t, array.Equal(arr2, arr))
+}
+
+func TestUUIDExtensionRecordBuilder(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "uuid", Type: extensions.NewUUIDType()},
+	}, nil)
+	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+	builder.Field(0).(*extensions.UUIDBuilder).Append(testUUID)
+	builder.Field(0).(*extensions.UUIDBuilder).AppendNull()
+	builder.Field(0).(*extensions.UUIDBuilder).Append(testUUID)
+	record := builder.NewRecord()
+	b, err := record.MarshalJSON()
+	require.NoError(t, err)
+	require.Equal(t, "[{\"uuid\":\""+testUUID.String()+"\"}\n,{\"uuid\":null}\n,{\"uuid\":\""+testUUID.String()+"\"}\n]", string(b))
+	record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b))
+	require.NoError(t, err)
+	require.Equal(t, record, record1)
+}
+
+func TestUUIDStringRoundTrip(t *testing.T) {
+	// 1. create array
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	b := extensions.NewUUIDBuilder(mem)
+	b.Append(uuid.Nil)
+	b.AppendNull()
+	b.Append(uuid.NameSpaceURL)
+	b.AppendNull()
+	b.Append(testUUID)
+
+	arr := b.NewArray()
+	defer arr.Release()
+
+	// 2. create array via AppendValueFromString
+	b1 := extensions.NewUUIDBuilder(mem)
+	defer b1.Release()
+
+	for i := 0; i < arr.Len(); i++ {
+		assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i)))
+	}
+
+	arr1 := b1.NewArray()
+	defer arr1.Release()
+
+	assert.True(t, array.Equal(arr, arr1))
+}
+
+func TestUUIDTypeBasics(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	assert.Equal(t, "arrow.uuid", typ.ExtensionName())
+	assert.True(t, typ.ExtensionEquals(typ))
+
+	assert.True(t, arrow.TypeEqual(typ, typ))
+	assert.False(t, arrow.TypeEqual(&arrow.FixedSizeBinaryType{ByteWidth: 16}, typ))
+	assert.True(t, arrow.TypeEqual(&arrow.FixedSizeBinaryType{ByteWidth: 16}, typ.StorageType()))
+
+	assert.Equal(t, "extension<arrow.uuid>", typ.String())
+}
+
+func TestUUIDTypeCreateFromArray(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 16})
+	defer bldr.Release()
+
+	bldr.Append(testUUID[:])
+	bldr.AppendNull()
+	bldr.Append(testUUID[:])
+
+	storage := bldr.NewArray()
+	defer storage.Release()
+
+	arr := array.NewExtensionArrayWithStorage(typ, storage)
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	require.Equal(t, testUUID, uuidArr.Value(0))
+	require.Equal(t, uuid.Nil, uuidArr.Value(1))
+	require.Equal(t, testUUID, uuidArr.Value(2))
+}
+
+func TestUUIDTypeBatchIPCRoundTrip(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	bldr.Append(testUUID)
+	bldr.AppendNull()
+	bldr.AppendBytes(testUUID)
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+		[]arrow.Array{arr}, -1)
+	defer batch.Release()
+
+	var written arrow.Record
+	{
+		var buf bytes.Buffer
+		wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+		require.NoError(t, wr.Write(batch))
+		require.NoError(t, wr.Close())
+
+		rdr, err := ipc.NewReader(&buf)
+		require.NoError(t, err)
+		written, err = rdr.Read()
+		require.NoError(t, err)
+		written.Retain()
+		defer written.Release()
+		rdr.Release()
+	}
+
+	assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+		batch.Schema(), written.Schema())
+
+	assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+		batch, written)
+}
+
+func TestMarshallUUIDArray(t *testing.T) {
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	bldr.Append(testUUID)
+	bldr.AppendNull()
+	bldr.AppendBytes(testUUID)
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	b, err := uuidArr.MarshalJSON()
+	require.NoError(t, err)
+
+	expectedJSON := fmt.Sprintf(`["%[1]s",null,"%[1]s"]`, testUUID)
+	require.Equal(t, expectedJSON, string(b))
+}
+
+func TestUUIDRecordToJSON(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	uuid1 := uuid.MustParse("8c607ed4-07b2-4b9c-b5eb-c0387357f9ae")
+
+	bldr.Append(uuid1)
+	bldr.AppendNull()
+
+	// c5f2cbd9-7094-491a-b267-167bb62efe02
+	bldr.AppendBytes([16]byte{197, 242, 203, 217, 112, 148, 73, 26, 178, 103, 22, 123, 182, 46, 254, 2})
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	rec := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "uuid", Type: typ, Nullable: true}}, nil), []arrow.Array{uuidArr}, 3)
+	defer rec.Release()
+
+	buf := bytes.NewBuffer([]byte("\n")) // expected output has leading newline for clearer formatting
+	require.NoError(t, array.RecordToJSON(rec, buf))
+
+	expectedJSON := `
+		{"uuid":"8c607ed4-07b2-4b9c-b5eb-c0387357f9ae"}
+		{"uuid":null}
+		{"uuid":"c5f2cbd9-7094-491a-b267-167bb62efe02"}
+	`
+
+	expectedJSONLines := strings.Split(expectedJSON, "\n")
+	actualJSONLines := strings.Split(buf.String(), "\n")
+
+	require.Equal(t, len(expectedJSONLines), len(actualJSONLines))
+	for i := range expectedJSONLines {
+		if strings.TrimSpace(expectedJSONLines[i]) != "" {
+			require.JSONEq(t, expectedJSONLines[i], actualJSONLines[i])
+		}
+	}
+}
diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index 4a600e5253e9b..4c9dc50135108 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -1102,6 +1102,15 @@ type PreparedStatement struct {
 	closed        bool
 }
 
+// NewPreparedStatement creates a prepared statement object bound to the provided
+// client using the given handle. In general, it should be sufficient to use the
+// Prepare function a client and this wouldn't be needed. But this can be used
+// to propagate a prepared statement from one client to another if needed or if
+// proxying requests.
+func NewPreparedStatement(client *Client, handle []byte) *PreparedStatement {
+	return &PreparedStatement{client: client, handle: handle}
+}
+
 // Execute executes the prepared statement on the server and returns a FlightInfo
 // indicating where to retrieve the response. If SetParameters has been called
 // then the parameter bindings will be sent before execution.
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index 7604b554cbc6c..d060161f94f0f 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -378,8 +378,10 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	createRsp := &mockDoActionClient{}
 	defer createRsp.AssertExpectations(s.T())
 	createRsp.On("Recv").Return(&pb.Result{Body: data}, nil).Once()
-	createRsp.On("Recv").Return(&pb.Result{}, io.EOF)
-	createRsp.On("CloseSend").Return(nil)
+	createRsp.On("Recv").Return(&pb.Result{}, io.EOF).Once()
+	createRsp.On("Recv").Return(&pb.Result{Body: data}, nil).Once()
+	createRsp.On("Recv").Return(&pb.Result{}, io.EOF).Once()
+	createRsp.On("CloseSend").Return(nil).Twice()
 
 	closeRsp := &mockDoActionClient{}
 	defer closeRsp.AssertExpectations(s.T())
@@ -387,13 +389,13 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	closeRsp.On("CloseSend").Return(nil)
 
 	s.mockClient.On("DoAction", flightsql.CreatePreparedStatementActionType, action.Body, s.callOpts).
-		Return(createRsp, nil)
+		Return(createRsp, nil).Twice()
 	s.mockClient.On("DoAction", flightsql.ClosePreparedStatementActionType, closeAct.Body, s.callOpts).
 		Return(closeRsp, nil)
 
 	infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)}
 	desc := getDesc(infoCmd)
-	s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil)
+	s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil).Twice()
 
 	prepared, err := s.sqlClient.Prepare(context.TODO(), query, s.callOpts...)
 	s.NoError(err)
@@ -404,6 +406,17 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	info, err := prepared.Execute(context.TODO(), s.callOpts...)
 	s.NoError(err)
 	s.Equal(&emptyFlightInfo, info)
+
+	prepared, err = s.sqlClient.Prepare(context.TODO(), query, s.callOpts...)
+	s.NoError(err)
+
+	secondPrepare := flightsql.NewPreparedStatement(&s.sqlClient, prepared.Handle())
+	s.Equal(string(secondPrepare.Handle()), "query")
+	defer secondPrepare.Close(context.TODO(), s.callOpts...)
+
+	info, err = secondPrepare.Execute(context.TODO(), s.callOpts...)
+	s.NoError(err)
+	s.Equal(&emptyFlightInfo, info)
 }
 
 func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() {
diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go
index 0f2b02deaca7c..0513fe1ecd346 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -266,13 +266,14 @@ func (s *Stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driv
 		return nil, err
 	}
 
+	execCtx := ctx
 	if _, set := ctx.Deadline(); !set && s.timeout > 0 {
 		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, s.timeout)
+		execCtx, cancel = context.WithTimeout(ctx, s.timeout)
 		defer cancel()
 	}
 
-	info, err := s.stmt.Execute(ctx)
+	info, err := s.stmt.Execute(execCtx)
 	if err != nil {
 		return nil, err
 	}
@@ -497,13 +498,14 @@ func (c *Connection) QueryContext(ctx context.Context, query string, args []driv
 		return nil, driver.ErrSkip
 	}
 
+	execCtx := ctx
 	if _, set := ctx.Deadline(); !set && c.timeout > 0 {
 		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, c.timeout)
+		execCtx, cancel = context.WithTimeout(ctx, c.timeout)
 		defer cancel()
 	}
 
-	info, err := c.client.Execute(ctx, query)
+	info, err := c.client.Execute(execCtx, query)
 	if err != nil {
 		return nil, err
 	}
diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go
index e5060ccbe33d0..c00dfe3c5d9a0 100644
--- a/go/arrow/flight/flightsql/driver/driver_test.go
+++ b/go/arrow/flight/flightsql/driver/driver_test.go
@@ -626,7 +626,6 @@ func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() {
 	rows, err := db.QueryContext(context.TODO(), sqlSelectAll)
 	require.NoError(t, err)
 	require.NotNil(t, rows)
-	require.NoError(t, rows.Err())
 
 	const closeAfterNRows = 10
 	var (
@@ -645,6 +644,7 @@ func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() {
 			require.NoError(t, rows.Close())
 		}
 	}
+	require.NoError(t, rows.Err())
 
 	require.Equal(t, closeAfterNRows, i)
 
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
index 1528bb05d9daa..b9535002a0a17 100644
--- a/go/arrow/internal/flight_integration/scenario.go
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -40,7 +40,6 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/internal/arrjson"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"golang.org/x/xerrors"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
@@ -161,9 +160,6 @@ func (s *defaultIntegrationTester) RunClient(addr string, opts ...grpc.DialOptio
 
 	ctx := context.Background()
 
-	arrow.RegisterExtensionType(types.NewUUIDType())
-	defer arrow.UnregisterExtensionType("uuid")
-
 	descr := &flight.FlightDescriptor{
 		Type: flight.DescriptorPATH,
 		Path: []string{s.path},
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
index b3e1dcac14119..c47a091268be9 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
@@ -22,12 +22,10 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/arrio"
 	"github.com/apache/arrow/go/v18/arrow/internal/arrjson"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
-	"github.com/apache/arrow/go/v18/internal/types"
 )
 
 func main() {
@@ -50,8 +48,6 @@ func main() {
 }
 
 func runCommand(jsonName, arrowName, mode string, verbose bool) error {
-	arrow.RegisterExtensionType(types.NewUUIDType())
-
 	if jsonName == "" {
 		return fmt.Errorf("must specify json file name")
 	}
diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go
index dea63579cfea6..b9a4547a5126a 100644
--- a/go/arrow/ipc/file_test.go
+++ b/go/arrow/ipc/file_test.go
@@ -17,13 +17,17 @@
 package ipc_test
 
 import (
+	"bytes"
 	"fmt"
 	"os"
 	"testing"
 
+	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
 	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
 )
 
 func TestFile(t *testing.T) {
@@ -75,3 +79,39 @@ func TestFileCompressed(t *testing.T) {
 		}
 	}
 }
+
+func TestFileEmbedsStream(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	recs := arrdata.Records["primitives"]
+	schema := recs[0].Schema()
+
+	var buf bytes.Buffer
+	w, err := ipc.NewFileWriter(&buf, ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer w.Close()
+
+	for _, rec := range recs {
+		require.NoError(t, w.Write(rec))
+	}
+
+	require.NoError(t, w.Close())
+
+	// we should be able to read a valid ipc stream within the ipc file
+
+	// create an ipc stream reader, skipping the file magic+padding bytes
+	rdr, err := ipc.NewReader(bytes.NewReader(buf.Bytes()[8:]), ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer rdr.Release()
+
+	// the stream reader should know to stop before the footer if the EOS indicator is properly written
+	var i int
+	for rdr.Next() {
+		rec := rdr.Record()
+		require.Truef(t, array.RecordEqual(rec, recs[i]), "records[%d] differ", i)
+		i++
+	}
+
+	require.NoError(t, rdr.Err())
+}
diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go
index 8582c81baf2fe..9a3d7d3dbeb02 100644
--- a/go/arrow/ipc/file_writer.go
+++ b/go/arrow/ipc/file_writer.go
@@ -37,23 +37,17 @@ type PayloadWriter interface {
 	Close() error
 }
 
-type pwriter struct {
-	w   io.WriteSeeker
-	pos int64
+type fileWriter struct {
+	streamWriter
 
 	schema *arrow.Schema
 	dicts  []fileBlock
 	recs   []fileBlock
 }
 
-func (w *pwriter) Start() error {
+func (w *fileWriter) Start() error {
 	var err error
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in start: %w", err)
-	}
-
 	// only necessary to align to 8-byte boundary at the start of the file
 	_, err = w.Write(Magic)
 	if err != nil {
@@ -65,10 +59,10 @@ func (w *pwriter) Start() error {
 		return fmt.Errorf("arrow/ipc: could not align start block: %w", err)
 	}
 
-	return err
+	return w.streamWriter.Start()
 }
 
-func (w *pwriter) WritePayload(p Payload) error {
+func (w *fileWriter) WritePayload(p Payload) error {
 	blk := fileBlock{Offset: w.pos, Meta: 0, Body: p.size}
 	n, err := writeIPCPayload(w, p)
 	if err != nil {
@@ -77,11 +71,6 @@ func (w *pwriter) WritePayload(p Payload) error {
 
 	blk.Meta = int32(n)
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in write-payload: %w", err)
-	}
-
 	switch flatbuf.MessageHeader(p.msg) {
 	case flatbuf.MessageHeaderDictionaryBatch:
 		w.dicts = append(w.dicts, blk)
@@ -92,27 +81,18 @@ func (w *pwriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *pwriter) Close() error {
+func (w *fileWriter) Close() error {
 	var err error
 
-	// write file footer
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in close: %w", err)
+	if err = w.streamWriter.Close(); err != nil {
+		return err
 	}
 
 	pos := w.pos
-	err = writeFileFooter(w.schema, w.dicts, w.recs, w)
-	if err != nil {
+	if err = writeFileFooter(w.schema, w.dicts, w.recs, w); err != nil {
 		return fmt.Errorf("arrow/ipc: could not write file footer: %w", err)
 	}
 
-	// write file footer length
-	err = w.updatePos() // not strictly needed as we passed w to writeFileFooter...
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not compute file footer length: %w", err)
-	}
-
 	size := w.pos - pos
 	if size <= 0 {
 		return fmt.Errorf("arrow/ipc: invalid file footer size (size=%d)", size)
@@ -133,13 +113,7 @@ func (w *pwriter) Close() error {
 	return nil
 }
 
-func (w *pwriter) updatePos() error {
-	var err error
-	w.pos, err = w.w.Seek(0, io.SeekCurrent)
-	return err
-}
-
-func (w *pwriter) align(align int32) error {
+func (w *fileWriter) align(align int32) error {
 	remainder := paddedLength(w.pos, align) - w.pos
 	if remainder == 0 {
 		return nil
@@ -149,12 +123,6 @@ func (w *pwriter) align(align int32) error {
 	return err
 }
 
-func (w *pwriter) Write(p []byte) (int, error) {
-	n, err := w.w.Write(p)
-	w.pos += int64(n)
-	return n, err
-}
-
 func writeIPCPayload(w io.Writer, p Payload) (int, error) {
 	n, err := writeMessage(p.meta, kArrowIPCAlignment, w)
 	if err != nil {
@@ -259,18 +227,12 @@ func (ps payloads) Release() {
 
 // FileWriter is an Arrow file writer.
 type FileWriter struct {
-	w io.WriteSeeker
+	w io.Writer
 
 	mem memory.Allocator
 
-	header struct {
-		started bool
-		offset  int64
-	}
-
-	footer struct {
-		written bool
-	}
+	headerStarted bool
+	footerWritten bool
 
 	pw PayloadWriter
 
@@ -289,7 +251,7 @@ type FileWriter struct {
 }
 
 // NewFileWriter opens an Arrow file using the provided writer w.
-func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
+func NewFileWriter(w io.Writer, opts ...Option) (*FileWriter, error) {
 	var (
 		cfg = newConfig(opts...)
 		err error
@@ -297,7 +259,7 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 
 	f := FileWriter{
 		w:               w,
-		pw:              &pwriter{w: w, schema: cfg.schema, pos: -1},
+		pw:              &fileWriter{streamWriter: streamWriter{w: w}, schema: cfg.schema},
 		mem:             cfg.alloc,
 		schema:          cfg.schema,
 		codec:           cfg.codec,
@@ -306,12 +268,6 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 		compressors:     make([]compressor, cfg.compressNP),
 	}
 
-	pos, err := f.w.Seek(0, io.SeekCurrent)
-	if err != nil {
-		return nil, fmt.Errorf("arrow/ipc: could not seek current position: %w", err)
-	}
-	f.header.offset = pos
-
 	return &f, err
 }
 
@@ -321,7 +277,7 @@ func (f *FileWriter) Close() error {
 		return fmt.Errorf("arrow/ipc: could not write empty file: %w", err)
 	}
 
-	if f.footer.written {
+	if f.footerWritten {
 		return nil
 	}
 
@@ -329,7 +285,7 @@ func (f *FileWriter) Close() error {
 	if err != nil {
 		return fmt.Errorf("arrow/ipc: could not close payload writer: %w", err)
 	}
-	f.footer.written = true
+	f.footerWritten = true
 
 	return nil
 }
@@ -367,14 +323,14 @@ func (f *FileWriter) Write(rec arrow.Record) error {
 }
 
 func (f *FileWriter) checkStarted() error {
-	if !f.header.started {
+	if !f.headerStarted {
 		return f.start()
 	}
 	return nil
 }
 
 func (f *FileWriter) start() error {
-	f.header.started = true
+	f.headerStarted = true
 	err := f.pw.Start()
 	if err != nil {
 		return err
diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go
index 33bc63c2a0068..14b8da2cf7cf7 100644
--- a/go/arrow/ipc/metadata_test.go
+++ b/go/arrow/ipc/metadata_test.go
@@ -23,10 +23,10 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
 	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	flatbuffers "github.com/google/flatbuffers/go"
 	"github.com/stretchr/testify/assert"
 )
@@ -169,7 +169,7 @@ func TestRWFooter(t *testing.T) {
 }
 
 func exampleUUID(mem memory.Allocator) arrow.Array {
-	extType := types.NewUUIDType()
+	extType := extensions.NewUUIDType()
 	bldr := array.NewExtensionBuilder(mem, extType)
 	defer bldr.Release()
 
@@ -184,9 +184,6 @@ func TestUnrecognizedExtensionType(t *testing.T) {
 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 	defer pool.AssertSize(t, 0)
 
-	// register the uuid type
-	assert.NoError(t, arrow.RegisterExtensionType(types.NewUUIDType()))
-
 	extArr := exampleUUID(pool)
 	defer extArr.Release()
 
@@ -205,7 +202,9 @@ func TestUnrecognizedExtensionType(t *testing.T) {
 
 	// unregister the uuid type before we read back the buffer so it is
 	// unrecognized when reading back the record batch.
-	assert.NoError(t, arrow.UnregisterExtensionType("uuid"))
+	assert.NoError(t, arrow.UnregisterExtensionType("arrow.uuid"))
+	// re-register once the test is complete
+	defer arrow.RegisterExtensionType(extensions.NewUUIDType())
 	rdr, err := NewReader(&buf, WithAllocator(pool))
 	defer rdr.Release()
 
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index 02c67635bb2fd..5a280fbf84a1f 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -37,18 +37,18 @@ import (
 	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
-type swriter struct {
+type streamWriter struct {
 	w   io.Writer
 	pos int64
 }
 
-func (w *swriter) Start() error { return nil }
-func (w *swriter) Close() error {
+func (w *streamWriter) Start() error { return nil }
+func (w *streamWriter) Close() error {
 	_, err := w.Write(kEOS[:])
 	return err
 }
 
-func (w *swriter) WritePayload(p Payload) error {
+func (w *streamWriter) WritePayload(p Payload) error {
 	_, err := writeIPCPayload(w, p)
 	if err != nil {
 		return err
@@ -56,7 +56,7 @@ func (w *swriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *swriter) Write(p []byte) (int, error) {
+func (w *streamWriter) Write(p []byte) (int, error) {
 	n, err := w.w.Write(p)
 	w.pos += int64(n)
 	return n, err
@@ -118,7 +118,7 @@ func NewWriter(w io.Writer, opts ...Option) *Writer {
 	return &Writer{
 		w:              w,
 		mem:            cfg.alloc,
-		pw:             &swriter{w: w},
+		pw:             &streamWriter{w: w},
 		schema:         cfg.schema,
 		codec:          cfg.codec,
 		emitDictDeltas: cfg.emitDictDeltas,
diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go
index 866e627113d88..27db42afa69b1 100644
--- a/go/arrow/scalar/parse.go
+++ b/go/arrow/scalar/parse.go
@@ -329,7 +329,7 @@ func fromListScalar(s ListScalar, v reflect.Value) error {
 		}
 	case *array.Map:
 		// only implementing slice of metadata for now
-		if v.Type().Elem() != reflect.PtrTo(reflect.TypeOf(arrow.Metadata{})) {
+		if v.Type().Elem() != reflect.PointerTo(reflect.TypeOf(arrow.Metadata{})) {
 			return fmt.Errorf("unimplemented fromListScalar type %s to %s", arr.DataType(), v.Type().String())
 		}
 
diff --git a/go/go.mod b/go/go.mod
index 9f4222a541bb6..77f98cefb0f0e 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -16,7 +16,7 @@
 
 module github.com/apache/arrow/go/v18
 
-go 1.21
+go 1.22
 
 require (
 	github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c
@@ -47,9 +47,9 @@ require (
 
 require (
 	github.com/google/uuid v1.6.0
-	github.com/hamba/avro/v2 v2.24.1
+	github.com/hamba/avro/v2 v2.25.0
 	github.com/huandu/xstrings v1.4.0
-	github.com/substrait-io/substrait-go v0.6.0
+	github.com/substrait-io/substrait-go v0.7.0
 	github.com/tidwall/sjson v1.2.5
 )
 
diff --git a/go/go.sum b/go/go.sum
index c7eb3a66deeec..6f22e11aef03a 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/hamba/avro/v2 v2.24.1 h1:Xi+7AnhaAc41aA/jmmYpxMsdEDOf1rdup6NJ85P7q2I=
-github.com/hamba/avro/v2 v2.24.1/go.mod h1:7vDfy/2+kYCE8WUHoj2et59GTv0ap7ptktMXu0QHePI=
+github.com/hamba/avro/v2 v2.25.0 h1:9qig/K4VP5tMq6DuKGfI6YdXncTkPJT1IJDMSv82EeI=
+github.com/hamba/avro/v2 v2.25.0/go.mod h1:I8glyswHnpED3Nlx2ZdUe+4LJnCOOyiCzLMno9i/Uu0=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
@@ -99,8 +99,8 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/substrait-io/substrait-go v0.6.0 h1:n2G/SGmrn7U5Q39VA8WeM2UfVL5Y/6HX8WAP9uJLNk4=
-github.com/substrait-io/substrait-go v0.6.0/go.mod h1:cl8Wsc7aBPDfcHp9+OrUqGpjkgrYlhcDsH/lMP6KUZA=
+github.com/substrait-io/substrait-go v0.7.0 h1:53yi73t4wW383+RD1YuhXhbjhP1KzF9GCxPC7SsRlqc=
+github.com/substrait-io/substrait-go v0.7.0/go.mod h1:7mjSvIaxk94bOF+YZn/vBOpHK4DWTpBv7nC/btjXCmc=
 github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go
index 85c64d86bffcb..33ada2d488f71 100644
--- a/go/internal/types/extension_types.go
+++ b/go/internal/types/extension_types.go
@@ -18,238 +18,15 @@
 package types
 
 import (
-	"bytes"
 	"encoding/binary"
 	"fmt"
 	"reflect"
-	"strings"
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
-	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/json"
-	"github.com/google/uuid"
 	"golang.org/x/xerrors"
 )
 
-var UUID = NewUUIDType()
-
-type UUIDBuilder struct {
-	*array.ExtensionBuilder
-}
-
-func NewUUIDBuilder(mem memory.Allocator) *UUIDBuilder {
-	return &UUIDBuilder{ExtensionBuilder: array.NewExtensionBuilder(mem, NewUUIDType())}
-}
-
-func (b *UUIDBuilder) Append(v uuid.UUID) {
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).Append(v[:])
-}
-
-func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID) {
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).UnsafeAppend(v[:])
-}
-
-func (b *UUIDBuilder) AppendValueFromString(s string) error {
-	if s == array.NullValueStr {
-		b.AppendNull()
-		return nil
-	}
-
-	uid, err := uuid.Parse(s)
-	if err != nil {
-		return err
-	}
-
-	b.Append(uid)
-	return nil
-}
-
-func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool) {
-	if len(v) != len(valid) && len(valid) != 0 {
-		panic("len(v) != len(valid) && len(valid) != 0")
-	}
-
-	data := make([][]byte, len(v))
-	for i := range v {
-		if len(valid) > 0 && !valid[i] {
-			continue
-		}
-		data[i] = v[i][:]
-	}
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid)
-}
-
-func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error {
-	t, err := dec.Token()
-	if err != nil {
-		return err
-	}
-
-	var val uuid.UUID
-	switch v := t.(type) {
-	case string:
-		val, err = uuid.Parse(v)
-		if err != nil {
-			return err
-		}
-	case []byte:
-		val, err = uuid.ParseBytes(v)
-		if err != nil {
-			return err
-		}
-	case nil:
-		b.AppendNull()
-		return nil
-	default:
-		return &json.UnmarshalTypeError{
-			Value:  fmt.Sprint(t),
-			Type:   reflect.TypeOf([]byte{}),
-			Offset: dec.InputOffset(),
-			Struct: fmt.Sprintf("FixedSizeBinary[%d]", 16),
-		}
-	}
-
-	b.Append(val)
-	return nil
-}
-
-func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error {
-	for dec.More() {
-		if err := b.UnmarshalOne(dec); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (b *UUIDBuilder) UnmarshalJSON(data []byte) error {
-	dec := json.NewDecoder(bytes.NewReader(data))
-	t, err := dec.Token()
-	if err != nil {
-		return err
-	}
-
-	if delim, ok := t.(json.Delim); !ok || delim != '[' {
-		return fmt.Errorf("uuid builder must unpack from json array, found %s", delim)
-	}
-
-	return b.Unmarshal(dec)
-}
-
-// UUIDArray is a simple array which is a FixedSizeBinary(16)
-type UUIDArray struct {
-	array.ExtensionArrayBase
-}
-
-func (a *UUIDArray) String() string {
-	arr := a.Storage().(*array.FixedSizeBinary)
-	o := new(strings.Builder)
-	o.WriteString("[")
-	for i := 0; i < arr.Len(); i++ {
-		if i > 0 {
-			o.WriteString(" ")
-		}
-		switch {
-		case a.IsNull(i):
-			o.WriteString(array.NullValueStr)
-		default:
-			fmt.Fprintf(o, "%q", a.Value(i))
-		}
-	}
-	o.WriteString("]")
-	return o.String()
-}
-
-func (a *UUIDArray) Value(i int) uuid.UUID {
-	if a.IsNull(i) {
-		return uuid.Nil
-	}
-	return uuid.Must(uuid.FromBytes(a.Storage().(*array.FixedSizeBinary).Value(i)))
-}
-
-func (a *UUIDArray) ValueStr(i int) string {
-	switch {
-	case a.IsNull(i):
-		return array.NullValueStr
-	default:
-		return a.Value(i).String()
-	}
-}
-
-func (a *UUIDArray) MarshalJSON() ([]byte, error) {
-	arr := a.Storage().(*array.FixedSizeBinary)
-	values := make([]interface{}, a.Len())
-	for i := 0; i < a.Len(); i++ {
-		if a.IsValid(i) {
-			values[i] = uuid.Must(uuid.FromBytes(arr.Value(i))).String()
-		}
-	}
-	return json.Marshal(values)
-}
-
-func (a *UUIDArray) GetOneForMarshal(i int) interface{} {
-	if a.IsNull(i) {
-		return nil
-	}
-	return a.Value(i)
-}
-
-// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
-// to be used for representing UUIDs
-type UUIDType struct {
-	arrow.ExtensionBase
-}
-
-// NewUUIDType is a convenience function to create an instance of UUIDType
-// with the correct storage type
-func NewUUIDType() *UUIDType {
-	return &UUIDType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
-}
-
-// ArrayType returns TypeOf(UUIDArray{}) for constructing UUID arrays
-func (*UUIDType) ArrayType() reflect.Type {
-	return reflect.TypeOf(UUIDArray{})
-}
-
-func (*UUIDType) ExtensionName() string {
-	return "uuid"
-}
-
-func (e *UUIDType) String() string {
-	return fmt.Sprintf("extension_type<storage=%s>", e.Storage)
-}
-
-func (e *UUIDType) MarshalJSON() ([]byte, error) {
-	return []byte(fmt.Sprintf(`{"name":"%s","metadata":%s}`, e.ExtensionName(), e.Serialize())), nil
-}
-
-// Serialize returns "uuid-serialized" for testing proper metadata passing
-func (*UUIDType) Serialize() string {
-	return "uuid-serialized"
-}
-
-// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16} and the data to be
-// "uuid-serialized" in order to correctly create a UUIDType for testing deserialize.
-func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
-	if data != "uuid-serialized" {
-		return nil, fmt.Errorf("type identifier did not match: '%s'", data)
-	}
-	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
-		return nil, fmt.Errorf("invalid storage type for UUIDType: %s", storageType.Name())
-	}
-	return NewUUIDType(), nil
-}
-
-// ExtensionEquals returns true if both extensions have the same name
-func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
-	return e.ExtensionName() == other.ExtensionName()
-}
-
-func (*UUIDType) NewBuilder(mem memory.Allocator) array.Builder {
-	return NewUUIDBuilder(mem)
-}
-
 // Parametric1Array is a simple int32 array for use with the Parametric1Type
 // in testing a parameterized user-defined extension type.
 type Parametric1Array struct {
@@ -518,14 +295,14 @@ func (SmallintType) ArrayType() reflect.Type { return reflect.TypeOf(SmallintArr
 
 func (SmallintType) ExtensionName() string { return "smallint" }
 
-func (SmallintType) Serialize() string { return "smallint" }
+func (SmallintType) Serialize() string { return "smallint-serialized" }
 
 func (s *SmallintType) ExtensionEquals(other arrow.ExtensionType) bool {
 	return s.Name() == other.Name()
 }
 
 func (SmallintType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
-	if data != "smallint" {
+	if data != "smallint-serialized" {
 		return nil, fmt.Errorf("type identifier did not match: '%s'", data)
 	}
 	if !arrow.TypeEqual(storageType, arrow.PrimitiveTypes.Int16) {
diff --git a/go/internal/types/extension_types_test.go b/go/internal/types/extension_types_test.go
deleted file mode 100644
index 65f6353d01be1..0000000000000
--- a/go/internal/types/extension_types_test.go
+++ /dev/null
@@ -1,95 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package types_test
-
-import (
-	"bytes"
-	"testing"
-
-	"github.com/apache/arrow/go/v18/arrow"
-	"github.com/apache/arrow/go/v18/arrow/array"
-	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/json"
-	"github.com/apache/arrow/go/v18/internal/types"
-	"github.com/google/uuid"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-var testUUID = uuid.New()
-
-func TestUUIDExtensionBuilder(t *testing.T) {
-	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
-	defer mem.AssertSize(t, 0)
-	builder := types.NewUUIDBuilder(mem)
-	builder.Append(testUUID)
-	arr := builder.NewArray()
-	defer arr.Release()
-	arrStr := arr.String()
-	assert.Equal(t, "[\""+testUUID.String()+"\"]", arrStr)
-	jsonStr, err := json.Marshal(arr)
-	assert.NoError(t, err)
-
-	arr1, _, err := array.FromJSON(mem, types.NewUUIDType(), bytes.NewReader(jsonStr))
-	defer arr1.Release()
-	assert.NoError(t, err)
-	assert.Equal(t, arr, arr1)
-}
-
-func TestUUIDExtensionRecordBuilder(t *testing.T) {
-	schema := arrow.NewSchema([]arrow.Field{
-		{Name: "uuid", Type: types.NewUUIDType()},
-	}, nil)
-	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
-	builder.Field(0).(*types.UUIDBuilder).Append(testUUID)
-	record := builder.NewRecord()
-	b, err := record.MarshalJSON()
-	require.NoError(t, err)
-	require.Equal(t, "[{\"uuid\":\""+testUUID.String()+"\"}\n]", string(b))
-	record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b))
-	require.NoError(t, err)
-	require.Equal(t, record, record1)
-}
-
-func TestUUIDStringRoundTrip(t *testing.T) {
-	// 1. create array
-	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
-	defer mem.AssertSize(t, 0)
-
-	b := types.NewUUIDBuilder(mem)
-	b.Append(uuid.Nil)
-	b.AppendNull()
-	b.Append(uuid.NameSpaceURL)
-	b.AppendNull()
-	b.Append(testUUID)
-
-	arr := b.NewArray()
-	defer arr.Release()
-
-	// 2. create array via AppendValueFromString
-	b1 := types.NewUUIDBuilder(mem)
-	defer b1.Release()
-
-	for i := 0; i < arr.Len(); i++ {
-		assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i)))
-	}
-
-	arr1 := b1.NewArray()
-	defer arr1.Release()
-
-	assert.True(t, array.Equal(arr, arr1))
-}
diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go
index 6e04f4254f9fa..4e480aeb8660b 100644
--- a/go/parquet/cmd/parquet_reader/main.go
+++ b/go/parquet/cmd/parquet_reader/main.go
@@ -154,7 +154,7 @@ func main() {
 			if descr.ConvertedType() != schema.ConvertedTypes.None {
 				fmt.Printf("/%s", descr.ConvertedType())
 				if descr.ConvertedType() == schema.ConvertedTypes.Decimal {
-					dec := descr.LogicalType().(*schema.DecimalLogicalType)
+					dec := descr.LogicalType().(schema.DecimalLogicalType)
 					fmt.Printf("(%d,%d)", dec.Precision(), dec.Scale())
 				}
 			}
diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
index b6a1349133e84..92f2ae99bb13f 100644
--- a/go/parquet/compress/compress.go
+++ b/go/parquet/compress/compress.go
@@ -49,8 +49,9 @@ var Codecs = struct {
 	Brotli Compression
 	// LZ4 unsupported in this library due to problematic issues between the Hadoop LZ4 spec vs regular lz4
 	// see: http://mail-archives.apache.org/mod_mbox/arrow-dev/202007.mbox/%3CCAAri41v24xuA8MGHLDvgSnE+7AAgOhiEukemW_oPNHMvfMmrWw@mail.gmail.com%3E
-	Lz4  Compression
-	Zstd Compression
+	Lz4    Compression
+	Zstd   Compression
+	Lz4Raw Compression
 }{
 	Uncompressed: Compression(parquet.CompressionCodec_UNCOMPRESSED),
 	Snappy:       Compression(parquet.CompressionCodec_SNAPPY),
@@ -59,17 +60,12 @@ var Codecs = struct {
 	Brotli:       Compression(parquet.CompressionCodec_BROTLI),
 	Lz4:          Compression(parquet.CompressionCodec_LZ4),
 	Zstd:         Compression(parquet.CompressionCodec_ZSTD),
+	Lz4Raw:       Compression(parquet.CompressionCodec_LZ4_RAW),
 }
 
 // Codec is an interface which is implemented for each compression type in order to make the interactions easy to
 // implement. Most consumers won't be calling GetCodec directly.
 type Codec interface {
-	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
-	NewReader(io.Reader) io.ReadCloser
-	// NewWriter provides a wrapper around a write stream to compress data before writing it.
-	NewWriter(io.Writer) io.WriteCloser
-	// NewWriterLevel is like NewWriter but allows specifying the compression level
-	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
 	// Encode encodes a block of data given by src and returns the compressed block. dst should be either nil
 	// or sized large enough to fit the compressed block (use CompressBound to allocate). dst and src should not
 	// overlap since some of the compression types don't allow it.
@@ -90,6 +86,16 @@ type Codec interface {
 	Decode(dst, src []byte) []byte
 }
 
+// StreamingCodec is an interface that may be implemented for compression codecs that expose a streaming API.
+type StreamingCodec interface {
+	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
+	NewReader(io.Reader) io.ReadCloser
+	// NewWriter provides a wrapper around a write stream to compress data before writing it.
+	NewWriter(io.Writer) io.WriteCloser
+	// NewWriterLevel is like NewWriter but allows specifying the compression level
+	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
+}
+
 var codecs = map[Compression]Codec{}
 
 // RegisterCodec adds or overrides a codec implementation for a given compression algorithm.
diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go
index 843062c0d024a..5aac74759e1f9 100644
--- a/go/parquet/compress/compress_test.go
+++ b/go/parquet/compress/compress_test.go
@@ -66,8 +66,8 @@ func TestCompressDataOneShot(t *testing.T) {
 		{compress.Codecs.Gzip},
 		{compress.Codecs.Brotli},
 		{compress.Codecs.Zstd},
+		{compress.Codecs.Lz4Raw},
 		// {compress.Codecs.Lzo},
-		// {compress.Codecs.Lz4},
 	}
 
 	for _, tt := range tests {
@@ -107,9 +107,11 @@ func TestCompressReaderWriter(t *testing.T) {
 			var buf bytes.Buffer
 			codec, err := compress.GetCodec(tt.c)
 			assert.NoError(t, err)
+			streamingCodec, ok := codec.(compress.StreamingCodec)
+			assert.True(t, ok)
 			data := makeRandomData(RandomDataSize)
 
-			wr := codec.NewWriter(&buf)
+			wr := streamingCodec.NewWriter(&buf)
 
 			const chunkSize = 1111
 			input := data
@@ -129,7 +131,7 @@ func TestCompressReaderWriter(t *testing.T) {
 			}
 			wr.Close()
 
-			rdr := codec.NewReader(&buf)
+			rdr := streamingCodec.NewReader(&buf)
 			out, err := io.ReadAll(rdr)
 			assert.NoError(t, err)
 			assert.Exactly(t, data, out)
diff --git a/go/parquet/compress/lz4_raw.go b/go/parquet/compress/lz4_raw.go
new file mode 100644
index 0000000000000..788d9520a668b
--- /dev/null
+++ b/go/parquet/compress/lz4_raw.go
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"sync"
+
+	"github.com/pierrec/lz4/v4"
+)
+
+// lz4.Compressor is not goroutine-safe, so we use a pool to amortize the cost
+// of allocating a new one for each call to Encode().
+var compressorPool = sync.Pool{New: func() interface{} { return new(lz4.Compressor) }}
+
+func compressBlock(src, dst []byte) (int, error) {
+	c := compressorPool.Get().(*lz4.Compressor)
+	defer compressorPool.Put(c)
+	return c.CompressBlock(src, dst)
+}
+
+type lz4RawCodec struct{}
+
+func (c lz4RawCodec) Encode(dst, src []byte) []byte {
+	n, err := compressBlock(src, dst[:cap(dst)])
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	// the lz4 block implementation does not allow level to be set
+	return c.Encode(dst, src)
+}
+
+func (lz4RawCodec) Decode(dst, src []byte) []byte {
+	n, err := lz4.UncompressBlock(src, dst)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) CompressBound(len int64) int64 {
+	return int64(lz4.CompressBlockBound(int(len)))
+}
+
+func init() {
+	RegisterCodec(Codecs.Lz4Raw, lz4RawCodec{})
+}
diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go
index f838482fbb0e9..f25b882e00647 100644
--- a/go/parquet/file/file_reader.go
+++ b/go/parquet/file/file_reader.go
@@ -233,7 +233,7 @@ func (f *Reader) parseMetaData() error {
 func (f *Reader) handleAadPrefix(fileDecrypt *parquet.FileDecryptionProperties, algo *parquet.Algorithm) (string, error) {
 	aadPrefixInProps := fileDecrypt.AadPrefix()
 	aadPrefix := []byte(aadPrefixInProps)
-	fileHasAadPrefix := algo.Aad.AadPrefix != nil && len(algo.Aad.AadPrefix) > 0
+	fileHasAadPrefix := len(algo.Aad.AadPrefix) > 0
 	aadPrefixInFile := algo.Aad.AadPrefix
 
 	if algo.Aad.SupplyAadPrefix && aadPrefixInProps == "" {
diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go
index 547ec475c2720..74926c958e2f7 100644
--- a/go/parquet/file/file_reader_test.go
+++ b/go/parquet/file/file_reader_test.go
@@ -452,6 +452,55 @@ func TestRleBooleanEncodingFileRead(t *testing.T) {
 	assert.Equal(t, expected, values[:len(expected)])
 }
 
+type mockBadReader struct {
+	cnt    int
+	reader *os.File
+}
+
+func (m *mockBadReader) Seek(offset int64, whence int) (int64, error) {
+	return m.reader.Seek(offset, whence)
+}
+
+func (m *mockBadReader) ReadAt(p []byte, off int64) (n int, err error) {
+	if m.cnt == 0 {
+		return 0, fmt.Errorf("mock error")
+	}
+	m.cnt--
+	return m.reader.ReadAt(p, off)
+}
+
+func TestBadReader(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	filePath := path.Join(dir, "byte_stream_split_extended.gzip.parquet")
+	f, err := os.Open(filePath)
+	assert.NoError(t, err)
+	defer f.Close()
+
+	reader := &mockBadReader{
+		cnt:    2,
+		reader: f,
+	}
+	r, err := file.NewParquetReader(reader, file.WithReadProps(&parquet.ReaderProperties{
+		BufferSize:            int64(1024),
+		BufferedStreamEnabled: true,
+	}))
+	assert.NoError(t, err)
+
+	fileReader, err := pqarrow.NewFileReader(r, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
+	assert.NoError(t, err)
+
+	columnReader, err := fileReader.GetColumn(context.Background(), 0)
+	assert.NoError(t, err)
+
+	_, err = columnReader.NextBatch(1)
+	assert.ErrorContains(t, err, "mock error") // Expect an error to occur.
+}
+
 func TestByteStreamSplitEncodingFileRead(t *testing.T) {
 	dir := os.Getenv("PARQUET_TEST_DATA")
 	if dir == "" {
@@ -644,3 +693,130 @@ func TestDeltaBinaryPackedMultipleBatches(t *testing.T) {
 
 	require.Equalf(t, size, totalRows, "Expected %d rows, but got %d rows", size, totalRows)
 }
+
+// Test read file lz4_raw_compressed.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 4
+	nCols := 3
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rowsInt64, ok := rdr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsInt64 := make([]int64, nRows)
+	total, read, err := rowsInt64.ReadBatch(int64(nRows), valsInt64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsInt64 := []int64{
+		1593604800,
+		1593604800,
+		1593604801,
+		1593604801,
+	}
+	require.Equal(t, expectedValsInt64, valsInt64)
+
+	rdr, err = rgr.Column(1)
+	require.NoError(t, err)
+
+	rowsByteArray, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	valsByteArray := make([]parquet.ByteArray, nRows)
+	total, read, err = rowsByteArray.ReadBatch(int64(nRows), valsByteArray, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsByteArray := []parquet.ByteArray{
+		[]byte("abc"),
+		[]byte("def"),
+		[]byte("abc"),
+		[]byte("def"),
+	}
+	require.Equal(t, expectedValsByteArray, valsByteArray)
+
+	rdr, err = rgr.Column(2)
+	require.NoError(t, err)
+
+	rowsFloat64, ok := rdr.(*file.Float64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsFloat64 := make([]float64, nRows)
+	total, read, err = rowsFloat64.ReadBatch(int64(nRows), valsFloat64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsFloat64 := []float64{
+		42.0,
+		7.7,
+		42.125,
+		7.7,
+	}
+	require.Equal(t, expectedValsFloat64, valsFloat64)
+}
+
+// Test read file lz4_raw_compressed_larger.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawLargerFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed_larger.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 10000
+	nCols := 1
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rows, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	vals := make([]parquet.ByteArray, nRows)
+	total, read, err := rows.ReadBatch(int64(nRows), vals, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsHead := []parquet.ByteArray{
+		[]byte("c7ce6bef-d5b0-4863-b199-8ea8c7fb117b"),
+		[]byte("e8fb9197-cb9f-4118-b67f-fbfa65f61843"),
+		[]byte("885136e1-0aa1-4fdb-8847-63d87b07c205"),
+		[]byte("ce7b2019-8ebe-4906-a74d-0afa2409e5df"),
+		[]byte("a9ee2527-821b-4b71-a926-03f73c3fc8b7"),
+	}
+	require.Equal(t, expectedValsHead, vals[:len(expectedValsHead)])
+}
diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go
index 0faf3f7233bd3..12ac93d1ef4b2 100644
--- a/go/parquet/file/file_writer_test.go
+++ b/go/parquet/file/file_writer_test.go
@@ -260,7 +260,7 @@ func (t *SerializeTestSuite) TestSmallFile() {
 		compress.Codecs.Brotli,
 		compress.Codecs.Gzip,
 		compress.Codecs.Zstd,
-		// compress.Codecs.Lz4,
+		compress.Codecs.Lz4Raw,
 		// compress.Codecs.Lzo,
 	}
 	for _, c := range codecs {
@@ -540,3 +540,59 @@ func TestBatchedByteStreamSplitFileRoundtrip(t *testing.T) {
 
 	require.NoError(t, rdr.Close())
 }
+
+func TestLZ4RawFileRoundtrip(t *testing.T) {
+	input := []int64{
+		-1, 0, 1, 2, 3, 4, 5, 123456789, -123456789,
+	}
+
+	size := len(input)
+
+	field, err := schema.NewPrimitiveNodeLogical("int64", parquet.Repetitions.Required, nil, parquet.Types.Int64, 0, 1)
+	require.NoError(t, err)
+
+	schema, err := schema.NewGroupNode("test", parquet.Repetitions.Required, schema.FieldList{field}, 0)
+	require.NoError(t, err)
+
+	sink := encoding.NewBufferWriter(0, memory.DefaultAllocator)
+	writer := file.NewParquetWriter(sink, schema, file.WithWriterProps(parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Lz4Raw))))
+
+	rgw := writer.AppendRowGroup()
+	cw, err := rgw.NextColumn()
+	require.NoError(t, err)
+
+	i64ColumnWriter, ok := cw.(*file.Int64ColumnChunkWriter)
+	require.True(t, ok)
+
+	nVals, err := i64ColumnWriter.WriteBatch(input, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, nVals)
+
+	require.NoError(t, cw.Close())
+	require.NoError(t, rgw.Close())
+	require.NoError(t, writer.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(sink.Bytes()))
+	require.NoError(t, err)
+
+	require.Equal(t, 1, rdr.NumRowGroups())
+	require.EqualValues(t, size, rdr.NumRows())
+
+	rgr := rdr.RowGroup(0)
+	cr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	i64ColumnReader, ok := cr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	output := make([]int64, size)
+
+	total, valuesRead, err := i64ColumnReader.ReadBatch(int64(size), output, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, total)
+	require.EqualValues(t, size, valuesRead)
+
+	require.Equal(t, input, output)
+
+	require.NoError(t, rdr.Close())
+}
diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go
index 667ffca77a8d1..765f4a9d34b33 100755
--- a/go/parquet/file/record_reader.go
+++ b/go/parquet/file/record_reader.go
@@ -645,7 +645,7 @@ func (rr *recordReader) ReadRecords(numRecords int64) (int64, error) {
 		}
 	}
 
-	return recordsRead, nil
+	return recordsRead, rr.Err()
 }
 
 func (rr *recordReader) ReleaseValidBits() *memory.Buffer {
diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go
index 887ed79343a42..345e9d440a1ca 100644
--- a/go/parquet/metadata/app_version.go
+++ b/go/parquet/metadata/app_version.go
@@ -164,7 +164,7 @@ func (v AppVersion) HasCorrectStatistics(coltype parquet.Type, logicalType schem
 	// parquet-cpp-arrow version 4.0.0 fixed Decimal comparisons for creating min/max stats
 	// parquet-cpp also becomes parquet-cpp-arrow as of version 4.0.0
 	if v.App == "parquet-cpp" || (v.App == "parquet-cpp-arrow" && v.LessThan(parquet1655FixedVersion)) {
-		if _, ok := logicalType.(*schema.DecimalLogicalType); ok && coltype == parquet.Types.FixedLenByteArray {
+		if _, ok := logicalType.(schema.DecimalLogicalType); ok && coltype == parquet.Types.FixedLenByteArray {
 			return false
 		}
 	}
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 16282173a685c..a238a78133e55 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -30,6 +30,7 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/bitutil"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/internal/types"
@@ -715,16 +716,6 @@ type ParquetIOTestSuite struct {
 	suite.Suite
 }
 
-func (ps *ParquetIOTestSuite) SetupTest() {
-	ps.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (ps *ParquetIOTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		ps.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (ps *ParquetIOTestSuite) makeSimpleSchema(typ arrow.DataType, rep parquet.Repetition) *schema.GroupNode {
 	byteWidth := int32(-1)
 
@@ -2053,7 +2044,7 @@ func (ps *ParquetIOTestSuite) TestArrowExtensionTypeRoundTrip() {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(ps.T(), 0)
 
-	builder := types.NewUUIDBuilder(mem)
+	builder := extensions.NewUUIDBuilder(mem)
 	builder.Append(uuid.New())
 	arr := builder.NewArray()
 	defer arr.Release()
@@ -2076,22 +2067,23 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 
 	{
 		// Prepare `written` table with the extension type registered.
-		extType := types.NewUUIDType()
+		extType := types.NewSmallintType()
 		bldr := array.NewExtensionBuilder(mem, extType)
 		defer bldr.Release()
 
-		bldr.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(
-			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+		bldr.Builder.(*array.Int16Builder).AppendValues(
+			[]int16{0, 0, 1, 2},
 			[]bool{false, true, true, true})
 
 		arr := bldr.NewArray()
 		defer arr.Release()
 
-		if arrow.GetExtensionType("uuid") != nil {
-			ps.NoError(arrow.UnregisterExtensionType("uuid"))
+		if arrow.GetExtensionType("smallint") != nil {
+			ps.NoError(arrow.UnregisterExtensionType("smallint"))
+			defer arrow.RegisterExtensionType(extType)
 		}
 
-		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
+		fld := arrow.Field{Name: "smallint", Type: arr.DataType(), Nullable: true}
 		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
 		defer arr.Release() // NewChunked
 		written = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
@@ -2101,16 +2093,16 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 
 	{
 		// Prepare `expected` table with the extension type unregistered in the underlying type.
-		bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 16})
+		bldr := array.NewInt16Builder(mem)
 		defer bldr.Release()
 		bldr.AppendValues(
-			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+			[]int16{0, 0, 1, 2},
 			[]bool{false, true, true, true})
 
 		arr := bldr.NewArray()
 		defer arr.Release()
 
-		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
+		fld := arrow.Field{Name: "smallint", Type: arr.DataType(), Nullable: true}
 		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
 		defer arr.Release() // NewChunked
 		expected = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
@@ -2147,13 +2139,55 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 	ps.Truef(array.Equal(exc, tbc), "expected: %T %s\ngot: %T %s", exc, exc, tbc, tbc)
 
 	expectedMd := arrow.MetadataFrom(map[string]string{
-		ipc.ExtensionTypeKeyName:     "uuid",
-		ipc.ExtensionMetadataKeyName: "uuid-serialized",
+		ipc.ExtensionTypeKeyName:     "smallint",
+		ipc.ExtensionMetadataKeyName: "smallint-serialized",
 		"PARQUET:field_id":           "-1",
 	})
 	ps.Truef(expectedMd.Equal(tbl.Column(0).Field().Metadata), "expected: %v\ngot: %v", expectedMd, tbl.Column(0).Field().Metadata)
 }
 
+func (ps *ParquetIOTestSuite) TestArrowExtensionTypeLogicalType() {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(ps.T(), 0)
+
+	jsonType, err := extensions.NewJSONType(arrow.BinaryTypes.String)
+	ps.NoError(err)
+
+	sch := arrow.NewSchema([]arrow.Field{
+		{Name: "uuid", Type: extensions.NewUUIDType()},
+		{Name: "json", Type: jsonType},
+	},
+		nil,
+	)
+	bldr := array.NewRecordBuilder(mem, sch)
+	defer bldr.Release()
+
+	bldr.Field(0).(*extensions.UUIDBuilder).Append(uuid.New())
+	bldr.Field(1).(*array.ExtensionBuilder).AppendValueFromString(`{"hello": ["world", 2, true], "world": null}`)
+	rec := bldr.NewRecord()
+	defer rec.Release()
+
+	var buf bytes.Buffer
+	wr, err := pqarrow.NewFileWriter(
+		sch,
+		&buf,
+		parquet.NewWriterProperties(),
+		pqarrow.DefaultWriterProps(),
+	)
+	ps.Require().NoError(err)
+
+	ps.Require().NoError(wr.Write(rec))
+	ps.Require().NoError(wr.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
+	ps.Require().NoError(err)
+	defer rdr.Close()
+
+	pqSchema := rdr.MetaData().Schema
+	ps.True(pqSchema.Column(0).LogicalType().Equals(schema.UUIDLogicalType{}))
+	ps.True(pqSchema.Column(1).LogicalType().Equals(schema.JSONLogicalType{}))
+}
+
 func TestWriteTableMemoryAllocation(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	sc := arrow.NewSchema([]arrow.Field{
@@ -2163,7 +2197,7 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
 			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true})},
 		{Name: "arr_i64", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64)},
-		{Name: "uuid", Type: types.NewUUIDType(), Nullable: true},
+		{Name: "uuid", Type: extensions.NewUUIDType(), Nullable: true},
 	}, nil)
 
 	bld := array.NewRecordBuilder(mem, sc)
@@ -2176,7 +2210,7 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 	abld := bld.Field(3).(*array.ListBuilder)
 	abld.Append(true)
 	abld.ValueBuilder().(*array.Int64Builder).Append(2)
-	bld.Field(4).(*types.UUIDBuilder).Append(uuid.MustParse("00000000-0000-0000-0000-000000000001"))
+	bld.Field(4).(*extensions.UUIDBuilder).Append(uuid.MustParse("00000000-0000-0000-0000-000000000001"))
 
 	rec := bld.NewRecord()
 	bld.Release()
diff --git a/go/parquet/pqarrow/path_builder_test.go b/go/parquet/pqarrow/path_builder_test.go
index 9bbae426b8a46..364f836d0bbca 100644
--- a/go/parquet/pqarrow/path_builder_test.go
+++ b/go/parquet/pqarrow/path_builder_test.go
@@ -22,8 +22,8 @@ import (
 
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -364,12 +364,12 @@ func TestNestedExtensionListsWithSomeNulls(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(t, 0)
 
-	listType := arrow.ListOf(types.NewUUIDType())
+	listType := arrow.ListOf(extensions.NewUUIDType())
 	bldr := array.NewListBuilder(mem, listType)
 	defer bldr.Release()
 
 	nestedBldr := bldr.ValueBuilder().(*array.ListBuilder)
-	vb := nestedBldr.ValueBuilder().(*types.UUIDBuilder)
+	vb := nestedBldr.ValueBuilder().(*extensions.UUIDBuilder)
 
 	uuid1 := uuid.New()
 	uuid3 := uuid.New()
diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go
index 31bd0eba84388..e020c7d9457a9 100644
--- a/go/parquet/pqarrow/reader_writer_test.go
+++ b/go/parquet/pqarrow/reader_writer_test.go
@@ -19,6 +19,8 @@ package pqarrow_test
 import (
 	"bytes"
 	"context"
+	"fmt"
+	"math"
 	"testing"
 	"unsafe"
 
@@ -26,8 +28,10 @@ import (
 	"github.com/apache/arrow/go/v18/arrow/array"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
 	"github.com/apache/arrow/go/v18/parquet/file"
 	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/stretchr/testify/require"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
 )
@@ -275,3 +279,110 @@ func BenchmarkReadColumnFloat64(b *testing.B) {
 		benchReadTable(b, tt.name, tbl, int64(arrow.Int32Traits.BytesRequired(SIZELEN)))
 	}
 }
+
+var compressTestCases = []struct {
+	c compress.Compression
+}{
+	{compress.Codecs.Uncompressed},
+	{compress.Codecs.Snappy},
+	{compress.Codecs.Gzip},
+	{compress.Codecs.Brotli},
+	{compress.Codecs.Zstd},
+	{compress.Codecs.Lz4Raw},
+	// {compress.Codecs.Lzo},
+}
+
+func buildTableForTest(mem memory.Allocator) arrow.Table {
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "strings", Type: arrow.BinaryTypes.String},
+			{Name: "bools", Type: arrow.FixedWidthTypes.Boolean},
+			{Name: "repeated_int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "repeated_strings", Type: arrow.BinaryTypes.String},
+			{Name: "repeated_bools", Type: arrow.FixedWidthTypes.Boolean},
+		},
+		nil,
+	)
+	bldr := array.NewRecordBuilder(mem, schema)
+	defer bldr.Release()
+
+	for i := 0; i < SIZELEN; i++ {
+		bldr.Field(0).(*array.Int64Builder).Append(int64(i))
+		bldr.Field(1).(*array.StringBuilder).Append(fmt.Sprint(i))
+		bldr.Field(2).(*array.BooleanBuilder).Append(i%2 == 0)
+		bldr.Field(3).(*array.Int64Builder).Append(0)
+		bldr.Field(4).(*array.StringBuilder).Append("the string is the same")
+		bldr.Field(5).(*array.BooleanBuilder).Append(true)
+	}
+
+	rec := bldr.NewRecord()
+	return array.NewTableFromRecords(schema, []arrow.Record{rec})
+}
+
+func BenchmarkWriteTableCompressed(b *testing.B) {
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	var uncompressedSize uint64
+	for idxCol := 0; int64(idxCol) < table.NumCols(); idxCol++ {
+		column := table.Column(idxCol)
+		for _, chunk := range column.Data().Chunks() {
+			uncompressedSize += chunk.Data().SizeInBytes()
+		}
+	}
+
+	var buf bytes.Buffer
+	buf.Grow(int(uncompressedSize))
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			buf.Reset()
+			b.ResetTimer()
+			b.SetBytes(int64(uncompressedSize))
+			for n := 0; n < b.N; n++ {
+				require.NoError(b,
+					pqarrow.WriteTable(
+						table,
+						&buf,
+						math.MaxInt64,
+						parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+						pqarrow.DefaultWriterProps(),
+					),
+				)
+			}
+		})
+	}
+}
+
+func BenchmarkReadTableCompressed(b *testing.B) {
+	ctx := context.Background()
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			var buf bytes.Buffer
+			err := pqarrow.WriteTable(
+				table,
+				&buf,
+				math.MaxInt64,
+				parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+				pqarrow.DefaultWriterProps(),
+			)
+			require.NoError(b, err)
+
+			compressedBytes := buf.Len()
+			rdr := bytes.NewReader(buf.Bytes())
+
+			b.ResetTimer()
+			b.SetBytes(int64(compressedBytes))
+			for n := 0; n < b.N; n++ {
+				tab, err := pqarrow.ReadTable(ctx, rdr, nil, pqarrow.ArrowReadProperties{}, mem)
+				require.NoError(b, err)
+				defer tab.Release()
+			}
+		})
+	}
+}
diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go
index ce5cc6f905084..4882077671f0f 100644
--- a/go/parquet/pqarrow/schema.go
+++ b/go/parquet/pqarrow/schema.go
@@ -25,7 +25,6 @@ import (
 	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/apache/arrow/go/v18/arrow/flight"
-	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/apache/arrow/go/v18/parquet"
 	"github.com/apache/arrow/go/v18/parquet/file"
@@ -120,6 +119,15 @@ func (sm *SchemaManifest) GetFieldIndices(indices []int) ([]int, error) {
 	return ret, nil
 }
 
+// ExtensionCustomParquetType is an interface that Arrow ExtensionTypes may implement
+// to specify the target LogicalType to use when converting to Parquet.
+//
+// The PrimitiveType is not configurable, and is determined by a fixed mapping from
+// the extension's StorageType to a Parquet type (see getParquetType in pqarrow source).
+type ExtensionCustomParquetType interface {
+	ParquetLogicalType() schema.LogicalType
+}
+
 func isDictionaryReadSupported(dt arrow.DataType) bool {
 	return arrow.IsBinaryLike(dt.ID())
 }
@@ -250,104 +258,14 @@ func structToNode(typ *arrow.StructType, name string, nullable bool, props *parq
 }
 
 func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties, arrprops ArrowWriterProperties) (schema.Node, error) {
-	var (
-		logicalType schema.LogicalType = schema.NoLogicalType{}
-		typ         parquet.Type
-		repType     = repFromNullable(field.Nullable)
-		length      = -1
-		precision   = -1
-		scale       = -1
-		err         error
-	)
+	repType := repFromNullable(field.Nullable)
 
+	// Handle complex types i.e. GroupNodes
 	switch field.Type.ID() {
 	case arrow.NULL:
-		typ = parquet.Types.Int32
-		logicalType = &schema.NullLogicalType{}
 		if repType != parquet.Repetitions.Optional {
 			return nil, xerrors.New("nulltype arrow field must be nullable")
 		}
-	case arrow.BOOL:
-		typ = parquet.Types.Boolean
-	case arrow.UINT8:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(8, false)
-	case arrow.INT8:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(8, true)
-	case arrow.UINT16:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(16, false)
-	case arrow.INT16:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(16, true)
-	case arrow.UINT32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(32, false)
-	case arrow.INT32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(32, true)
-	case arrow.UINT64:
-		typ = parquet.Types.Int64
-		logicalType = schema.NewIntLogicalType(64, false)
-	case arrow.INT64:
-		typ = parquet.Types.Int64
-		logicalType = schema.NewIntLogicalType(64, true)
-	case arrow.FLOAT32:
-		typ = parquet.Types.Float
-	case arrow.FLOAT64:
-		typ = parquet.Types.Double
-	case arrow.STRING, arrow.LARGE_STRING:
-		logicalType = schema.StringLogicalType{}
-		fallthrough
-	case arrow.BINARY, arrow.LARGE_BINARY:
-		typ = parquet.Types.ByteArray
-	case arrow.FIXED_SIZE_BINARY:
-		typ = parquet.Types.FixedLenByteArray
-		length = field.Type.(*arrow.FixedSizeBinaryType).ByteWidth
-	case arrow.DECIMAL, arrow.DECIMAL256:
-		dectype := field.Type.(arrow.DecimalType)
-		precision = int(dectype.GetPrecision())
-		scale = int(dectype.GetScale())
-
-		if props.StoreDecimalAsInteger() && 1 <= precision && precision <= 18 {
-			if precision <= 9 {
-				typ = parquet.Types.Int32
-			} else {
-				typ = parquet.Types.Int64
-			}
-		} else {
-			typ = parquet.Types.FixedLenByteArray
-			length = int(DecimalSize(int32(precision)))
-		}
-
-		logicalType = schema.NewDecimalLogicalType(int32(precision), int32(scale))
-	case arrow.DATE32:
-		typ = parquet.Types.Int32
-		logicalType = schema.DateLogicalType{}
-	case arrow.DATE64:
-		typ = parquet.Types.Int32
-		logicalType = schema.DateLogicalType{}
-	case arrow.TIMESTAMP:
-		typ, logicalType, err = getTimestampMeta(field.Type.(*arrow.TimestampType), props, arrprops)
-		if err != nil {
-			return nil, err
-		}
-	case arrow.TIME32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitMillis)
-	case arrow.TIME64:
-		typ = parquet.Types.Int64
-		timeType := field.Type.(*arrow.Time64Type)
-		if timeType.Unit == arrow.Nanosecond {
-			logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitNanos)
-		} else {
-			logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitMicros)
-		}
-	case arrow.FLOAT16:
-		typ = parquet.Types.FixedLenByteArray
-		length = arrow.Float16SizeBytes
-		logicalType = schema.Float16LogicalType{}
 	case arrow.STRUCT:
 		return structToNode(field.Type.(*arrow.StructType), field.Name, field.Nullable, props, arrprops)
 	case arrow.FIXED_SIZE_LIST, arrow.LIST:
@@ -369,16 +287,6 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
 		dictType := field.Type.(*arrow.DictionaryType)
 		return fieldToNode(name, arrow.Field{Name: name, Type: dictType.ValueType, Nullable: field.Nullable, Metadata: field.Metadata},
 			props, arrprops)
-	case arrow.EXTENSION:
-		return fieldToNode(name, arrow.Field{
-			Name:     name,
-			Type:     field.Type.(arrow.ExtensionType).StorageType(),
-			Nullable: field.Nullable,
-			Metadata: arrow.MetadataFrom(map[string]string{
-				ipc.ExtensionTypeKeyName:     field.Type.(arrow.ExtensionType).ExtensionName(),
-				ipc.ExtensionMetadataKeyName: field.Type.(arrow.ExtensionType).Serialize(),
-			}),
-		}, props, arrprops)
 	case arrow.MAP:
 		mapType := field.Type.(*arrow.MapType)
 		keyNode, err := fieldToNode("key", mapType.KeyField(), props, arrprops)
@@ -402,8 +310,12 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
 			}, -1)
 		}
 		return schema.MapOf(field.Name, keyNode, valueNode, repFromNullable(field.Nullable), -1)
-	default:
-		return nil, fmt.Errorf("%w: support for %s", arrow.ErrNotImplemented, field.Type.ID())
+	}
+
+	// Not a GroupNode
+	typ, logicalType, length, err := getParquetType(field.Type, props, arrprops)
+	if err != nil {
+		return nil, err
 	}
 
 	return schema.NewPrimitiveNodeLogical(name, repType, logicalType, typ, length, fieldIDFromMeta(field.Metadata))
@@ -472,7 +384,7 @@ func (s schemaTree) RecordLeaf(leaf *SchemaField) {
 	s.manifest.ColIndexToField[leaf.ColIndex] = leaf
 }
 
-func arrowInt(log *schema.IntLogicalType) (arrow.DataType, error) {
+func arrowInt(log schema.IntLogicalType) (arrow.DataType, error) {
 	switch log.BitWidth() {
 	case 8:
 		if log.IsSigned() {
@@ -499,7 +411,7 @@ func arrowInt(log *schema.IntLogicalType) (arrow.DataType, error) {
 	}
 }
 
-func arrowTime32(logical *schema.TimeLogicalType) (arrow.DataType, error) {
+func arrowTime32(logical schema.TimeLogicalType) (arrow.DataType, error) {
 	if logical.TimeUnit() == schema.TimeUnitMillis {
 		return arrow.FixedWidthTypes.Time32ms, nil
 	}
@@ -507,7 +419,7 @@ func arrowTime32(logical *schema.TimeLogicalType) (arrow.DataType, error) {
 	return nil, xerrors.New(logical.String() + " cannot annotate a time32")
 }
 
-func arrowTime64(logical *schema.TimeLogicalType) (arrow.DataType, error) {
+func arrowTime64(logical schema.TimeLogicalType) (arrow.DataType, error) {
 	switch logical.TimeUnit() {
 	case schema.TimeUnitMicros:
 		return arrow.FixedWidthTypes.Time64us, nil
@@ -518,7 +430,7 @@ func arrowTime64(logical *schema.TimeLogicalType) (arrow.DataType, error) {
 	}
 }
 
-func arrowTimestamp(logical *schema.TimestampLogicalType) (arrow.DataType, error) {
+func arrowTimestamp(logical schema.TimestampLogicalType) (arrow.DataType, error) {
 	tz := ""
 
 	// ConvertedTypes are adjusted to UTC per backward compatibility guidelines
@@ -539,7 +451,7 @@ func arrowTimestamp(logical *schema.TimestampLogicalType) (arrow.DataType, error
 	}
 }
 
-func arrowDecimal(logical *schema.DecimalLogicalType) arrow.DataType {
+func arrowDecimal(logical schema.DecimalLogicalType) arrow.DataType {
 	if logical.Precision() <= decimal128.MaxPrecision {
 		return &arrow.Decimal128Type{Precision: logical.Precision(), Scale: logical.Scale()}
 	}
@@ -550,11 +462,11 @@ func arrowFromInt32(logical schema.LogicalType) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
 	case schema.NoLogicalType:
 		return arrow.PrimitiveTypes.Int32, nil
-	case *schema.TimeLogicalType:
+	case schema.TimeLogicalType:
 		return arrowTime32(logtype)
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
-	case *schema.IntLogicalType:
+	case schema.IntLogicalType:
 		return arrowInt(logtype)
 	case schema.DateLogicalType:
 		return arrow.FixedWidthTypes.Date32, nil
@@ -569,13 +481,13 @@ func arrowFromInt64(logical schema.LogicalType) (arrow.DataType, error) {
 	}
 
 	switch logtype := logical.(type) {
-	case *schema.IntLogicalType:
+	case schema.IntLogicalType:
 		return arrowInt(logtype)
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
-	case *schema.TimeLogicalType:
+	case schema.TimeLogicalType:
 		return arrowTime64(logtype)
-	case *schema.TimestampLogicalType:
+	case schema.TimestampLogicalType:
 		return arrowTimestamp(logtype)
 	default:
 		return nil, xerrors.New(logical.String() + " cannot annotate int64")
@@ -586,7 +498,7 @@ func arrowFromByteArray(logical schema.LogicalType) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
 	case schema.StringLogicalType:
 		return arrow.BinaryTypes.String, nil
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
 	case schema.NoLogicalType,
 		schema.EnumLogicalType,
@@ -600,7 +512,7 @@ func arrowFromByteArray(logical schema.LogicalType) (arrow.DataType, error) {
 
 func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
 	case schema.NoLogicalType, schema.IntervalLogicalType, schema.UUIDLogicalType:
 		return &arrow.FixedSizeBinaryType{ByteWidth: int(length)}, nil
@@ -611,6 +523,84 @@ func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, erro
 	}
 }
 
+func getParquetType(typ arrow.DataType, props *parquet.WriterProperties, arrprops ArrowWriterProperties) (parquet.Type, schema.LogicalType, int, error) {
+	switch typ.ID() {
+	case arrow.NULL:
+		return parquet.Types.Int32, schema.NullLogicalType{}, -1, nil
+	case arrow.BOOL:
+		return parquet.Types.Boolean, schema.NoLogicalType{}, -1, nil
+	case arrow.UINT8:
+		return parquet.Types.Int32, schema.NewIntLogicalType(8, false), -1, nil
+	case arrow.INT8:
+		return parquet.Types.Int32, schema.NewIntLogicalType(8, true), -1, nil
+	case arrow.UINT16:
+		return parquet.Types.Int32, schema.NewIntLogicalType(16, false), -1, nil
+	case arrow.INT16:
+		return parquet.Types.Int32, schema.NewIntLogicalType(16, true), -1, nil
+	case arrow.UINT32:
+		return parquet.Types.Int32, schema.NewIntLogicalType(32, false), -1, nil
+	case arrow.INT32:
+		return parquet.Types.Int32, schema.NewIntLogicalType(32, true), -1, nil
+	case arrow.UINT64:
+		return parquet.Types.Int64, schema.NewIntLogicalType(64, false), -1, nil
+	case arrow.INT64:
+		return parquet.Types.Int64, schema.NewIntLogicalType(64, true), -1, nil
+	case arrow.FLOAT32:
+		return parquet.Types.Float, schema.NoLogicalType{}, -1, nil
+	case arrow.FLOAT64:
+		return parquet.Types.Double, schema.NoLogicalType{}, -1, nil
+	case arrow.STRING, arrow.LARGE_STRING:
+		return parquet.Types.ByteArray, schema.StringLogicalType{}, -1, nil
+	case arrow.BINARY, arrow.LARGE_BINARY:
+		return parquet.Types.ByteArray, schema.NoLogicalType{}, -1, nil
+	case arrow.FIXED_SIZE_BINARY:
+		return parquet.Types.FixedLenByteArray, schema.NoLogicalType{}, typ.(*arrow.FixedSizeBinaryType).ByteWidth, nil
+	case arrow.DECIMAL, arrow.DECIMAL256:
+		dectype := typ.(arrow.DecimalType)
+		precision := int(dectype.GetPrecision())
+		scale := int(dectype.GetScale())
+
+		if !props.StoreDecimalAsInteger() || precision > 18 {
+			return parquet.Types.FixedLenByteArray, schema.NewDecimalLogicalType(int32(precision), int32(scale)), int(DecimalSize(int32(precision))), nil
+		}
+
+		pqType := parquet.Types.Int32
+		if precision > 9 {
+			pqType = parquet.Types.Int64
+		}
+
+		return pqType, schema.NoLogicalType{}, -1, nil
+	case arrow.DATE32:
+		return parquet.Types.Int32, schema.DateLogicalType{}, -1, nil
+	case arrow.DATE64:
+		return parquet.Types.Int32, schema.DateLogicalType{}, -1, nil
+	case arrow.TIMESTAMP:
+		pqType, logicalType, err := getTimestampMeta(typ.(*arrow.TimestampType), props, arrprops)
+		return pqType, logicalType, -1, err
+	case arrow.TIME32:
+		return parquet.Types.Int32, schema.NewTimeLogicalType(true, schema.TimeUnitMillis), -1, nil
+	case arrow.TIME64:
+		pqTimeUnit := schema.TimeUnitMicros
+		if typ.(*arrow.Time64Type).Unit == arrow.Nanosecond {
+			pqTimeUnit = schema.TimeUnitNanos
+		}
+
+		return parquet.Types.Int64, schema.NewTimeLogicalType(true, pqTimeUnit), -1, nil
+	case arrow.FLOAT16:
+		return parquet.Types.FixedLenByteArray, schema.Float16LogicalType{}, arrow.Float16SizeBytes, nil
+	case arrow.EXTENSION:
+		storageType := typ.(arrow.ExtensionType).StorageType()
+		pqType, logicalType, length, err := getParquetType(storageType, props, arrprops)
+		if withCustomType, ok := typ.(ExtensionCustomParquetType); ok {
+			logicalType = withCustomType.ParquetLogicalType()
+		}
+
+		return pqType, logicalType, length, err
+	default:
+		return parquet.Type(0), nil, 0, fmt.Errorf("%w: support for %s", arrow.ErrNotImplemented, typ.ID())
+	}
+}
+
 func getArrowType(physical parquet.Type, logical schema.LogicalType, typeLen int) (arrow.DataType, error) {
 	if !logical.IsValid() || logical.Equals(schema.NullLogicalType{}) {
 		return arrow.Null, nil
diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go
index 24b031c174bf2..528200fd0e7d9 100644
--- a/go/parquet/pqarrow/schema_test.go
+++ b/go/parquet/pqarrow/schema_test.go
@@ -21,10 +21,10 @@ import (
 	"testing"
 
 	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/apache/arrow/go/v18/arrow/flight"
 	"github.com/apache/arrow/go/v18/arrow/ipc"
 	"github.com/apache/arrow/go/v18/arrow/memory"
-	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/apache/arrow/go/v18/parquet"
 	"github.com/apache/arrow/go/v18/parquet/metadata"
 	"github.com/apache/arrow/go/v18/parquet/pqarrow"
@@ -34,7 +34,7 @@ import (
 )
 
 func TestGetOriginSchemaBase64(t *testing.T) {
-	uuidType := types.NewUUIDType()
+	uuidType := extensions.NewUUIDType()
 	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
 	extMd := arrow.NewMetadata([]string{ipc.ExtensionMetadataKeyName, ipc.ExtensionTypeKeyName, "PARQUET:field_id"}, []string{uuidType.Serialize(), uuidType.ExtensionName(), "-1"})
 	origArrSc := arrow.NewSchema([]arrow.Field{
@@ -44,10 +44,6 @@ func TestGetOriginSchemaBase64(t *testing.T) {
 	}, nil)
 
 	arrSerializedSc := flight.SerializeSchema(origArrSc, memory.DefaultAllocator)
-	if err := arrow.RegisterExtensionType(uuidType); err != nil {
-		t.Fatal(err)
-	}
-	defer arrow.UnregisterExtensionType(uuidType.ExtensionName())
 	pqschema, err := pqarrow.ToParquet(origArrSc, nil, pqarrow.DefaultWriterProps())
 	require.NoError(t, err)
 
@@ -71,11 +67,7 @@ func TestGetOriginSchemaBase64(t *testing.T) {
 }
 
 func TestGetOriginSchemaUnregisteredExtension(t *testing.T) {
-	uuidType := types.NewUUIDType()
-	if err := arrow.RegisterExtensionType(uuidType); err != nil {
-		t.Fatal(err)
-	}
-
+	uuidType := extensions.NewUUIDType()
 	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
 	origArrSc := arrow.NewSchema([]arrow.Field{
 		{Name: "f1", Type: arrow.BinaryTypes.String, Metadata: md},
@@ -90,6 +82,7 @@ func TestGetOriginSchemaUnregisteredExtension(t *testing.T) {
 	kv.Append("ARROW:schema", base64.StdEncoding.EncodeToString(arrSerializedSc))
 
 	arrow.UnregisterExtensionType(uuidType.ExtensionName())
+	defer arrow.RegisterExtensionType(uuidType)
 	arrsc, err := pqarrow.FromParquet(pqschema, nil, kv)
 	require.NoError(t, err)
 
diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go
index 5fc10f61cebc1..b2b6f50cbf682 100644
--- a/go/parquet/schema/converted_types.go
+++ b/go/parquet/schema/converted_types.go
@@ -113,13 +113,9 @@ func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) LogicalTy
 	case ConvertedTypes.TimeMicros:
 		return NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
 	case ConvertedTypes.TimestampMillis:
-		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMillis)
-		t.(*TimestampLogicalType).fromConverted = true
-		return t
+		return NewTimestampLogicalTypeWithOpts(WithTSIsAdjustedToUTC(), WithTSTimeUnitType(TimeUnitMillis), WithTSFromConverted())
 	case ConvertedTypes.TimestampMicros:
-		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
-		t.(*TimestampLogicalType).fromConverted = true
-		return t
+		return NewTimestampLogicalTypeWithOpts(WithTSIsAdjustedToUTC(), WithTSTimeUnitType(TimeUnitMicros), WithTSFromConverted())
 	case ConvertedTypes.Interval:
 		return IntervalLogicalType{}
 	case ConvertedTypes.Int8:
diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go
index e8adce1ca140e..fa46ea0172f76 100644
--- a/go/parquet/schema/logical_types.go
+++ b/go/parquet/schema/logical_types.go
@@ -45,21 +45,21 @@ func getLogicalType(l *format.LogicalType) LogicalType {
 	case l.IsSetENUM():
 		return EnumLogicalType{}
 	case l.IsSetDECIMAL():
-		return &DecimalLogicalType{typ: l.DECIMAL}
+		return DecimalLogicalType{typ: l.DECIMAL}
 	case l.IsSetDATE():
 		return DateLogicalType{}
 	case l.IsSetTIME():
 		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
 			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
 		}
-		return &TimeLogicalType{typ: l.TIME}
+		return TimeLogicalType{typ: l.TIME}
 	case l.IsSetTIMESTAMP():
 		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
 			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
 		}
-		return &TimestampLogicalType{typ: l.TIMESTAMP}
+		return TimestampLogicalType{typ: l.TIMESTAMP}
 	case l.IsSetINTEGER():
-		return &IntLogicalType{typ: l.INTEGER}
+		return IntLogicalType{typ: l.INTEGER}
 	case l.IsSetUNKNOWN():
 		return NullLogicalType{}
 	case l.IsSetJSON():
@@ -344,7 +344,7 @@ func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
 	if scale < 0 || scale > precision {
 		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
 	}
-	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
+	return DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
 }
 
 // DecimalLogicalType is used to represent a decimal value of a given
@@ -405,7 +405,7 @@ func (t DecimalLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*DecimalLogicalType)
+	other, ok := rhs.(DecimalLogicalType)
 	if !ok {
 		return false
 	}
@@ -509,7 +509,7 @@ func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
 
 // NewTimeLogicalType returns a time type of the given unit.
 func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimeLogicalType{typ: &format.TimeType{
+	return TimeLogicalType{typ: &format.TimeType{
 		IsAdjustedToUTC: isAdjustedToUTC,
 		Unit:            createTimeUnit(unit),
 	}}
@@ -584,7 +584,7 @@ func (t TimeLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t TimeLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*TimeLogicalType)
+	other, ok := rhs.(TimeLogicalType)
 	if !ok {
 		return false
 	}
@@ -595,7 +595,7 @@ func (t TimeLogicalType) Equals(rhs LogicalType) bool {
 // NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
 // set to false
 func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimestampLogicalType{
+	return TimestampLogicalType{
 		typ: &format.TimestampType{
 			IsAdjustedToUTC: isAdjustedToUTC,
 			Unit:            createTimeUnit(unit),
@@ -608,7 +608,7 @@ func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalTyp
 // NewTimestampLogicalTypeForce returns a timestamp logical type with
 // "forceConverted" set to true
 func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimestampLogicalType{
+	return TimestampLogicalType{
 		typ: &format.TimestampType{
 			IsAdjustedToUTC: isAdjustedToUTC,
 			Unit:            createTimeUnit(unit),
@@ -654,14 +654,14 @@ func WithTSFromConverted() TimestampOpt {
 //
 // TimestampType Unit defaults to milliseconds (TimeUnitMillis)
 func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType {
-	ts := &TimestampLogicalType{
+	ts := TimestampLogicalType{
 		typ: &format.TimestampType{
 			Unit: createTimeUnit(TimeUnitMillis), // default to milliseconds
 		},
 	}
 
 	for _, o := range opts {
-		o(ts)
+		o(&ts)
 	}
 
 	return ts
@@ -760,7 +760,7 @@ func (t TimestampLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*TimestampLogicalType)
+	other, ok := rhs.(TimestampLogicalType)
 	if !ok {
 		return false
 	}
@@ -778,7 +778,7 @@ func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
 	default:
 		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
 	}
-	return &IntLogicalType{
+	return IntLogicalType{
 		typ: &format.IntType{
 			BitWidth: bitWidth,
 			IsSigned: signed,
@@ -864,7 +864,7 @@ func (t IntLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t IntLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*IntLogicalType)
+	other, ok := rhs.(IntLogicalType)
 	if !ok {
 		return false
 	}
diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go
index e33925966e178..395d1504182fe 100644
--- a/go/parquet/schema/logical_types_test.go
+++ b/go/parquet/schema/logical_types_test.go
@@ -38,18 +38,18 @@ func TestConvertedLogicalEquivalences(t *testing.T) {
 		{"list", schema.ConvertedTypes.List, schema.NewListLogicalType(), schema.NewListLogicalType()},
 		{"enum", schema.ConvertedTypes.Enum, schema.EnumLogicalType{}, schema.EnumLogicalType{}},
 		{"date", schema.ConvertedTypes.Date, schema.DateLogicalType{}, schema.DateLogicalType{}},
-		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
-		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
-		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimestampLogicalType{}},
-		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimestampLogicalType{}},
-		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimeLogicalType{}},
+		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimeLogicalType{}},
+		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimestampLogicalType{}},
+		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimestampLogicalType{}},
+		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
 		{"json", schema.ConvertedTypes.JSON, schema.JSONLogicalType{}, schema.JSONLogicalType{}},
 		{"bson", schema.ConvertedTypes.BSON, schema.BSONLogicalType{}, schema.BSONLogicalType{}},
 		{"interval", schema.ConvertedTypes.Interval, schema.IntervalLogicalType{}, schema.IntervalLogicalType{}},
@@ -72,8 +72,8 @@ func TestConvertedLogicalEquivalences(t *testing.T) {
 		fromMake := schema.NewDecimalLogicalType(10, 4)
 		assert.IsType(t, fromMake, fromConverted)
 		assert.True(t, fromConverted.Equals(fromMake))
-		assert.IsType(t, &schema.DecimalLogicalType{}, fromConverted)
-		assert.IsType(t, &schema.DecimalLogicalType{}, fromMake)
+		assert.IsType(t, schema.DecimalLogicalType{}, fromConverted)
+		assert.IsType(t, schema.DecimalLogicalType{}, fromMake)
 		assert.True(t, schema.NewDecimalLogicalType(16, 0).Equals(schema.NewDecimalLogicalType(16, 0)))
 	})
 }
@@ -160,12 +160,12 @@ func TestNewTypeIncompatibility(t *testing.T) {
 		{"uuid", schema.UUIDLogicalType{}, schema.UUIDLogicalType{}},
 		{"float16", schema.Float16LogicalType{}, schema.Float16LogicalType{}},
 		{"null", schema.NullLogicalType{}, schema.NullLogicalType{}},
-		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
-		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
-		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
-		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
-		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
-		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
+		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimeLogicalType{}},
+		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimeLogicalType{}},
+		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimeLogicalType{}},
+		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimeLogicalType{}},
+		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimestampLogicalType{}},
+		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimestampLogicalType{}},
 	}
 
 	for _, tt := range tests {
diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go
index 0bec9eb599dc8..51d0a84f2244f 100644
--- a/go/parquet/schema/reflection.go
+++ b/go/parquet/schema/reflection.go
@@ -639,7 +639,7 @@ func typeFromNode(n Node) reflect.Type {
 		}
 
 		if n.RepetitionType() == parquet.Repetitions.Optional {
-			typ = reflect.PtrTo(typ)
+			typ = reflect.PointerTo(typ)
 		} else if n.RepetitionType() == parquet.Repetitions.Repeated {
 			typ = reflect.SliceOf(typ)
 		}
@@ -707,7 +707,7 @@ func typeFromNode(n Node) reflect.Type {
 				elemType = reflect.SliceOf(elemType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				elemType = reflect.PtrTo(elemType)
+				elemType = reflect.PointerTo(elemType)
 			}
 			return elemType
 		case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
@@ -778,7 +778,7 @@ func typeFromNode(n Node) reflect.Type {
 
 			mapType := reflect.MapOf(keyType, valType)
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				mapType = reflect.PtrTo(mapType)
+				mapType = reflect.PointerTo(mapType)
 			}
 			return mapType
 		default:
@@ -796,7 +796,7 @@ func typeFromNode(n Node) reflect.Type {
 				return reflect.SliceOf(structType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				return reflect.PtrTo(structType)
+				return reflect.PointerTo(structType)
 			}
 			return structType
 		}
diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go
index 7da55ce93abe6..e427ba6485e64 100644
--- a/go/parquet/schema/schema_element_test.go
+++ b/go/parquet/schema/schema_element_test.go
@@ -192,7 +192,7 @@ func (s *SchemaElementConstructionSuite) TestSimple() {
 
 func (s *SchemaElementConstructionSuite) reconstructDecimal(c schemaElementConstructArgs) *decimalSchemaElementConstruction {
 	ret := s.reconstruct(c)
-	dec := c.logical.(*DecimalLogicalType)
+	dec := c.logical.(DecimalLogicalType)
 	return &decimalSchemaElementConstruction{*ret, int(dec.Precision()), int(dec.Scale())}
 }
 
@@ -359,7 +359,7 @@ func (s *SchemaElementConstructionSuite) TestTemporal() {
 
 func (s *SchemaElementConstructionSuite) reconstructInteger(c schemaElementConstructArgs) *intSchemaElementConstruction {
 	base := s.reconstruct(c)
-	l := c.logical.(*IntLogicalType)
+	l := c.logical.(IntLogicalType)
 	return &intSchemaElementConstruction{
 		*base,
 		l.BitWidth(),
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index ec8ddbbb780df..d9cd2bb21a526 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -61,7 +61,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
index 633ecd43bd570..93fef6d7ca801 100644
--- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
+++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
@@ -47,7 +47,9 @@
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -400,13 +402,17 @@ public List<ArrowBuf> visit(ArrowType.Duration type) {
 
   @Override
   public List<ArrowBuf> visit(ArrowType.ListView type) {
-    throw new UnsupportedOperationException(
-        "Importing buffers for view type: " + type + " not supported");
+    return Arrays.asList(
+        maybeImportBitmap(type),
+        importFixedBytes(type, 1, ListViewVector.OFFSET_WIDTH),
+        importFixedBytes(type, 2, ListViewVector.SIZE_WIDTH));
   }
 
   @Override
   public List<ArrowBuf> visit(ArrowType.LargeListView type) {
-    throw new UnsupportedOperationException(
-        "Importing buffers for view type: " + type + " not supported");
+    return Arrays.asList(
+        maybeImportBitmap(type),
+        importFixedBytes(type, 1, LargeListViewVector.OFFSET_WIDTH),
+        importFixedBytes(type, 2, LargeListViewVector.SIZE_WIDTH));
   }
 }
diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java b/java/c/src/main/java/org/apache/arrow/c/Format.java
index aff51e7b734ab..f77a555d18481 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Format.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Format.java
@@ -229,6 +229,10 @@ static String asString(ArrowType arrowType) {
         return "vu";
       case BinaryView:
         return "vz";
+      case ListView:
+        return "+vl";
+      case LargeListView:
+        return "+vL";
       case NONE:
         throw new IllegalArgumentException("Arrow type ID is NONE");
       default:
@@ -313,6 +317,10 @@ static ArrowType asType(String format, long flags)
         return new ArrowType.Utf8View();
       case "vz":
         return new ArrowType.BinaryView();
+      case "+vl":
+        return new ArrowType.ListView();
+      case "+vL":
+        return new ArrowType.LargeListView();
       default:
         String[] parts = format.split(":", 2);
         if (parts.length == 2) {
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index 6591d1f730990..18b2e94adde47 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -84,7 +84,9 @@
 import org.apache.arrow.vector.compare.VectorEqualsVisitor;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -683,6 +685,46 @@ public void testFixedSizeListVector() {
     }
   }
 
+  @Test
+  public void testListViewVector() {
+    try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+      setVector(
+          vector,
+          Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+          Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+          new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, ListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testEmptyListViewVector() {
+    try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+      setVector(vector, new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, ListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVector() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) {
+      setVector(
+          vector,
+          Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+          Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+          new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, LargeListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testEmptyLargeListViewVector() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) {
+      setVector(vector, new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, LargeListViewVector.class));
+    }
+  }
+
   @Test
   public void testMapVector() {
     int count = 5;
diff --git a/java/c/src/test/python/integration_tests.py b/java/c/src/test/python/integration_tests.py
index ab2ee1742f366..b0a86e9c66e59 100644
--- a/java/c/src/test/python/integration_tests.py
+++ b/java/c/src/test/python/integration_tests.py
@@ -352,6 +352,53 @@ def test_reader_complex_roundtrip(self):
         ]
         self.round_trip_reader(schema, data)
 
+    def test_listview_array(self):
+        self.round_trip_array(lambda: pa.array(
+            [[], [0], [1, 2], [4, 5, 6]], pa.list_view(pa.int64())
+            # disabled check_metadata since in Java API the listview
+            # internal field name ("item") is not preserved 
+            # during round trips (it becomes "$data$").
+        ), check_metadata=False)
+
+    def test_empty_listview_array(self):
+        with pa.BufferOutputStream() as bos:
+            schema = pa.schema([pa.field("f0", pa.list_view(pa.int32()), True)])
+            with ipc.new_stream(bos, schema) as writer:
+                src = pa.RecordBatch.from_arrays(
+                    [pa.array([[]], pa.list_view(pa.int32()))], schema=schema)
+                writer.write(src)
+        data_bytes = bos.getvalue()
+
+        def recreate_batch():
+            with pa.input_stream(data_bytes) as ios:
+                with ipc.open_stream(ios) as reader:
+                    return reader.read_next_batch()
+
+        self.round_trip_record_batch(recreate_batch)
+
+    def test_largelistview_array(self):
+        self.round_trip_array(lambda: pa.array(
+            [[], [0], [1, 2], [4, 5, 6]], pa.large_list_view(pa.int64())
+            # disabled check_metadata since in Java API the listview
+            # internal field name ("item") is not preserved
+            # during round trips (it becomes "$data$").
+        ), check_metadata=False)
+
+    def test_empty_largelistview_array(self):
+        with pa.BufferOutputStream() as bos:
+            schema = pa.schema([pa.field("f0", pa.large_list_view(pa.int32()), True)])
+            with ipc.new_stream(bos, schema) as writer:
+                src = pa.RecordBatch.from_arrays(
+                    [pa.array([[]], pa.large_list_view(pa.int32()))], schema=schema)
+                writer.write(src)
+        data_bytes = bos.getvalue()
+
+        def recreate_batch():
+            with pa.input_stream(data_bytes) as ios:
+                with ipc.open_stream(ios) as reader:
+                    return reader.read_next_batch()
+
+        self.round_trip_record_batch(recreate_batch)
 
 if __name__ == '__main__':
     unittest.main(verbosity=2)
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index a1f2bc861da1f..f0d8e92c9a41d 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -50,12 +50,12 @@ under the License.
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.27.0</version>
+      <version>1.27.1</version>
     </dependency>
     <dependency>
       <groupId>com.github.luben</groupId>
       <artifactId>zstd-jni</artifactId>
-      <version>1.5.6-4</version>
+      <version>1.5.6-5</version>
     </dependency>
   </dependencies>
 </project>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index f3384fabbed6d..a19e934f0de98 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -32,7 +32,7 @@ under the License.
 
   <properties>
     <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
-    <parquet.version>1.14.1</parquet.version>
+    <parquet.version>1.14.2</parquet.version>
     <avro.version>1.12.0</avro.version>
   </properties>
 
@@ -130,7 +130,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index e4d1d5d3885a5..dec679de3a543 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -134,7 +134,7 @@ under the License.
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-common-protos</artifactId>
-      <version>2.42.0</version>
+      <version>2.43.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
index 05dbe42c49172..ac761457f57fd 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
@@ -188,6 +188,7 @@ public static final class Builder {
     private CallHeaderAuthenticator headerAuthenticator = CallHeaderAuthenticator.NO_OP;
     private ExecutorService executor = null;
     private int maxInboundMessageSize = MAX_GRPC_MESSAGE_SIZE;
+    private int maxHeaderListSize = MAX_GRPC_MESSAGE_SIZE;
     private int backpressureThreshold = DEFAULT_BACKPRESSURE_THRESHOLD;
     private InputStream certChain;
     private InputStream key;
@@ -324,6 +325,7 @@ public FlightServer build() {
       builder
           .executor(exec)
           .maxInboundMessageSize(maxInboundMessageSize)
+          .maxInboundMetadataSize(maxHeaderListSize)
           .addService(
               ServerInterceptors.intercept(
                   flightService,
@@ -366,6 +368,11 @@ public FlightServer build() {
       return new FlightServer(location, builder.build(), grpcExecutor);
     }
 
+    public Builder setMaxHeaderListSize(int maxHeaderListSize) {
+      this.maxHeaderListSize = maxHeaderListSize;
+      return this;
+    }
+
     /**
      * Set the maximum size of a message. Defaults to "unlimited", depending on the underlying
      * transport.
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
index 5ebeb44c1d36e..fc3f83e4eafd3 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
@@ -27,6 +27,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.Optional;
+import java.util.Random;
 import org.apache.arrow.flight.impl.Flight;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -152,4 +153,76 @@ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor
       assertEquals("No schema is present in FlightInfo", e.getMessage());
     }
   }
+
+  /**
+   * Test for GH-41584 where flight defaults for header size was not in sync b\w client and server.
+   */
+  @Test
+  public void testHeaderSizeExchangeInService() throws Exception {
+    final FlightProducer producer =
+        new NoOpFlightProducer() {
+          @Override
+          public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+            String longHeader =
+                context.getMiddleware(FlightConstants.HEADER_KEY).headers().get("long-header");
+            return new FlightInfo(
+                null,
+                descriptor,
+                Collections.emptyList(),
+                0,
+                0,
+                false,
+                IpcOption.DEFAULT,
+                longHeader.getBytes(StandardCharsets.UTF_8));
+          }
+        };
+
+    String headerVal = generateRandom(1024 * 10);
+    FlightCallHeaders callHeaders = new FlightCallHeaders();
+    callHeaders.insert("long-header", headerVal);
+    // sever with default header limit same as client
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+    }
+    // server with 15kb header limit
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .setMaxHeaderListSize(1024 * 15)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+
+      callHeaders.insert("another-header", headerVal + headerVal);
+      FlightRuntimeException e =
+          assertThrows(
+              FlightRuntimeException.class,
+              () ->
+                  client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders)));
+      assertEquals("http2 exception", e.getMessage());
+    }
+  }
+
+  private static String generateRandom(int size) {
+    String aToZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
+    Random random = new Random();
+    StringBuilder res = new StringBuilder();
+    for (int i = 0; i < size; i++) {
+      int randIndex = random.nextInt(aToZ.length());
+      res.append(aToZ.charAt(randIndex));
+    }
+    return res.toString();
+  }
 }
diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml
index a154062ba814d..7da5156404dba 100644
--- a/java/flight/flight-integration-tests/pom.xml
+++ b/java/flight/flight-integration-tests/pom.xml
@@ -58,7 +58,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index c9c589d202ac6..92bab5e206757 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -118,7 +118,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
       <optional>true</optional>
     </dependency>
   </dependencies>
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
index e7127faf97539..67bfc85c48602 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
@@ -181,9 +181,8 @@ public static void main(String[] args) throws Exception {
 
   public FlightSqlExample(final Location location, final String dbName) {
     // TODO Constructor should not be doing work.
-    checkState(
-        removeDerbyDatabaseIfExists(dbName) && populateDerbyDatabase(dbName),
-        "Failed to reset Derby database!");
+    checkState(removeDerbyDatabaseIfExists(dbName), "Failed to clear Derby database!");
+    checkState(populateDerbyDatabase(dbName), "Failed to populate Derby database!");
     databaseUri = "jdbc:derby:target/" + dbName;
     final ConnectionFactory connectionFactory =
         new DriverManagerConnectionFactory(databaseUri, new Properties());
@@ -253,36 +252,35 @@ public FlightSqlExample(final Location location, final String dbName) {
   }
 
   public static boolean removeDerbyDatabaseIfExists(final String dbName) {
-    boolean wasSuccess;
     final Path path = Paths.get("target" + File.separator + dbName);
 
     try (final Stream<Path> walk = Files.walk(path)) {
       /*
        * Iterate over all paths to delete, mapping each path to the outcome of its own
-       * deletion as a boolean representing whether or not each individual operation was
-       * successful; then reduce all booleans into a single answer, and store that into
-       * `wasSuccess`, which will later be returned by this method.
+       * deletion as a boolean representing whether each individual operation was
+       * successful; then reduce all booleans into a single answer.
        * If for whatever reason the resulting `Stream<Boolean>` is empty, throw an `IOException`;
        * this not expected.
        */
-      wasSuccess =
+      boolean unused =
           walk.sorted(Comparator.reverseOrder())
               .map(Path::toFile)
               .map(File::delete)
               .reduce(Boolean::logicalAnd)
               .orElseThrow(IOException::new);
-    } catch (IOException e) {
+    } catch (NoSuchFileException e) {
       /*
        * The only acceptable scenario for an `IOException` to be thrown here is if
        * an attempt to delete an non-existing file takes place -- which should be
        * alright, since they would be deleted anyway.
        */
-      if (!(wasSuccess = e instanceof NoSuchFileException)) {
-        LOGGER.error(format("Failed attempt to clear DerbyDB: <%s>", e.getMessage()), e);
-      }
+      LOGGER.error(format("No existing Derby database to delete.: <%s>", e.getMessage()), e);
+      return true;
+    } catch (Exception e) {
+      LOGGER.error(format("Failed attempt to clear DerbyDB.: <%s>", e.getMessage()), e);
+      return false;
     }
-
-    return wasSuccess;
+    return true;
   }
 
   private static boolean populateDerbyDatabase(final String dbName) {
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
index 20066ed14b65a..856cc88ab9c39 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
@@ -34,9 +34,22 @@ public interface AllocationReservation extends AutoCloseable {
    * @param nBytes the number of bytes to add
    * @return true if the addition is possible, false otherwise
    * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   * @deprecated use {@link #add(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean add(int nBytes);
 
+  /**
+   * Add to the current reservation.
+   *
+   * <p>Adding may fail if the allocator is not allowed to consume any more space.
+   *
+   * @param nBytes the number of bytes to add
+   * @return true if the addition is possible, false otherwise
+   * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   */
+  boolean add(long nBytes);
+
   /**
    * Requests a reservation of additional space.
    *
@@ -44,9 +57,21 @@ public interface AllocationReservation extends AutoCloseable {
    *
    * @param nBytes the amount to reserve
    * @return true if the reservation can be satisfied, false otherwise
+   * @deprecated use {@link #reserve(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean reserve(int nBytes);
 
+  /**
+   * Requests a reservation of additional space.
+   *
+   * <p>The implementation of the allocator's inner class provides this.
+   *
+   * @param nBytes the amount to reserve
+   * @return true if the reservation can be satisfied, false otherwise
+   */
+  boolean reserve(long nBytes);
+
   /**
    * Allocate a buffer whose size is the total of all the add()s made.
    *
@@ -65,6 +90,13 @@ public interface AllocationReservation extends AutoCloseable {
    */
   int getSize();
 
+  /**
+   * Get the current size of the reservation (the sum of all the add()s) as a long value.
+   *
+   * @return size of the current reservation
+   */
+  long getSizeLong();
+
   /**
    * Return whether or not the reservation has been used.
    *
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index dd6375e910b92..20a89d0b7bf18 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -29,6 +29,7 @@
 import org.apache.arrow.memory.util.AssertionUtil;
 import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.memory.util.HistoricalLog;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 import org.checkerframework.checker.initialization.qual.Initialized;
 import org.checkerframework.checker.nullness.qual.KeyFor;
@@ -860,7 +861,7 @@ RoundingPolicy getRoundingPolicy() {
   public class Reservation implements AllocationReservation {
 
     private final @Nullable HistoricalLog historicalLog;
-    private int nBytes = 0;
+    private long nBytes = 0;
     private boolean used = false;
     private boolean closed = false;
 
@@ -888,8 +889,15 @@ public Reservation() {
       }
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean add(final int nBytes) {
+      return add((long) nBytes);
+    }
+
+    @Override
+    public boolean add(final long nBytes) {
       assertOpen();
 
       Preconditions.checkArgument(nBytes >= 0, "nBytes(%d) < 0", nBytes);
@@ -906,7 +914,7 @@ public boolean add(final int nBytes) {
       // modifying this behavior so that we maintain what we reserve and what the user asked for
       // and make sure to only
       // round to power of two as necessary.
-      final int nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
+      final long nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
       if (!reserve(nBytesTwo)) {
         return false;
       }
@@ -929,6 +937,11 @@ public ArrowBuf allocateBuffer() {
 
     @Override
     public int getSize() {
+      return LargeMemoryUtil.checkedCastToInt(nBytes);
+    }
+
+    @Override
+    public long getSizeLong() {
       return nBytes;
     }
 
@@ -978,8 +991,15 @@ public void close() {
       closed = true;
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean reserve(int nBytes) {
+      return reserve((long) nBytes);
+    }
+
+    @Override
+    public boolean reserve(long nBytes) {
       assertOpen();
 
       final AllocationOutcome outcome = BaseAllocator.this.allocateBytes(nBytes);
@@ -999,7 +1019,7 @@ public boolean reserve(int nBytes) {
      * @param nBytes the size of the buffer requested
      * @return the buffer, or null, if the request cannot be satisfied
      */
-    private ArrowBuf allocate(int nBytes) {
+    private ArrowBuf allocate(long nBytes) {
       assertOpen();
 
       boolean success = false;
@@ -1033,7 +1053,7 @@ private ArrowBuf allocate(int nBytes) {
      *
      * @param nBytes the size of the reservation
      */
-    private void releaseReservation(int nBytes) {
+    private void releaseReservation(long nBytes) {
       assertOpen();
 
       releaseBytes(nBytes);
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
index f8dd7e1d1cb38..50f33d3f021c7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
@@ -17,9 +17,9 @@
 package org.apache.arrow.memory;
 
 /**
- * Child allocator class. Only slightly different from the {@see RootAllocator}, in that these can't
- * be created directly, but must be obtained from {@link BufferAllocator#newChildAllocator(String,
- * AllocationListener, long, long)}.
+ * Child allocator class. Only slightly different from the {@link RootAllocator}, in that these
+ * can't be created directly, but must be obtained from {@link
+ * BufferAllocator#newChildAllocator(String, AllocationListener, long, long)}.
  *
  * <p>Child allocators can only be created by the root, or other children, so this class is package
  * private.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
index 289b10634d84e..90e8a1d5eca77 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
@@ -34,13 +34,13 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
    *
    * <p>It was copied from {@link io.netty.buffer.PooledByteBufAllocator}.
    */
-  private static final int MIN_PAGE_SIZE = 4096;
+  private static final long MIN_PAGE_SIZE = 4096;
 
-  private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
+  private static final long MAX_CHUNK_SIZE = ((long) Integer.MAX_VALUE + 1) / 2;
   private static final long DEFAULT_CHUNK_SIZE;
 
   static {
-    int defaultPageSize = Integer.getInteger("org.apache.memory.allocator.pageSize", 8192);
+    long defaultPageSize = Long.getLong("org.apache.memory.allocator.pageSize", 8192);
     try {
       validateAndCalculatePageShifts(defaultPageSize);
     } catch (Throwable t) {
@@ -60,7 +60,7 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
     }
   }
 
-  private static int validateAndCalculatePageShifts(int pageSize) {
+  private static long validateAndCalculatePageShifts(long pageSize) {
     if (pageSize < MIN_PAGE_SIZE) {
       throw new IllegalArgumentException(
           "pageSize: " + pageSize + " (expected: " + MIN_PAGE_SIZE + ")");
@@ -71,17 +71,17 @@ private static int validateAndCalculatePageShifts(int pageSize) {
     }
 
     // Logarithm base 2. At this point we know that pageSize is a power of two.
-    return Integer.SIZE - 1 - Integer.numberOfLeadingZeros(pageSize);
+    return Long.SIZE - 1L - Long.numberOfLeadingZeros(pageSize);
   }
 
-  private static int validateAndCalculateChunkSize(int pageSize, int maxOrder) {
+  private static long validateAndCalculateChunkSize(long pageSize, int maxOrder) {
     if (maxOrder > 14) {
       throw new IllegalArgumentException("maxOrder: " + maxOrder + " (expected: 0-14)");
     }
 
     // Ensure the resulting chunkSize does not overflow.
-    int chunkSize = pageSize;
-    for (int i = maxOrder; i > 0; i--) {
+    long chunkSize = pageSize;
+    for (long i = maxOrder; i > 0; i--) {
       if (chunkSize > MAX_CHUNK_SIZE / 2) {
         throw new IllegalArgumentException(
             String.format(
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
index f501cfedd168d..89db736e6a0f9 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
@@ -16,6 +16,8 @@
  */
 package org.apache.arrow.memory.rounding;
 
+import com.google.errorprone.annotations.InlineMe;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 
 /** The rounding policy that each buffer size must a multiple of the segment size. */
@@ -28,7 +30,7 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * The segment size. It must be at least {@link SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, and be a
    * power of 2.
    */
-  private int segmentSize;
+  private long segmentSize;
 
   /**
    * Constructor for the segment rounding policy.
@@ -36,8 +38,22 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * @param segmentSize the segment size.
    * @throws IllegalArgumentException if the segment size is smaller than {@link
    *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   * @deprecated use {@link SegmentRoundingPolicy#SegmentRoundingPolicy(long)} instead.
    */
+  @Deprecated(forRemoval = true)
+  @InlineMe(replacement = "this((long) segmentSize)")
   public SegmentRoundingPolicy(int segmentSize) {
+    this((long) segmentSize);
+  }
+
+  /**
+   * Constructor for the segment rounding policy.
+   *
+   * @param segmentSize the segment size.
+   * @throws IllegalArgumentException if the segment size is smaller than {@link
+   *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   */
+  public SegmentRoundingPolicy(long segmentSize) {
     Preconditions.checkArgument(
         segmentSize >= MIN_SEGMENT_SIZE,
         "The segment size cannot be smaller than %s",
@@ -52,7 +68,12 @@ public long getRoundedSize(long requestSize) {
     return (requestSize + (segmentSize - 1)) / segmentSize * segmentSize;
   }
 
+  @Deprecated(forRemoval = true)
   public int getSegmentSize() {
+    return LargeMemoryUtil.checkedCastToInt(segmentSize);
+  }
+
+  public long getSegmentSizeAsLong() {
     return segmentSize;
   }
 }
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
index a5fbc67c48f5c..87e9316964dfc 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
@@ -315,7 +315,7 @@ public void testRootAllocator_createChildDontClose() throws Exception {
 
   @Test
   public void testSegmentAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator =
         new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy)) {
       ArrowBuf buf = allocator.buffer(798);
@@ -334,7 +334,7 @@ public void testSegmentAllocator() {
 
   @Test
   public void testSegmentAllocator_childAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy);
         BufferAllocator childAllocator = allocator.newChildAllocator("child", 0, 512 * 1024)) {
 
@@ -357,14 +357,14 @@ public void testSegmentAllocator_childAllocator() {
   @Test
   public void testSegmentAllocator_smallSegment() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128L));
     assertEquals("The segment size cannot be smaller than 1024", e.getMessage());
   }
 
   @Test
   public void testSegmentAllocator_segmentSizeNotPowerOf2() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097L));
     assertEquals("The segment size must be a power of 2", e.getMessage());
   }
 
@@ -957,7 +957,7 @@ public void testAllocator_unclaimedReservation() throws Exception {
       try (final BufferAllocator childAllocator1 =
           rootAllocator.newChildAllocator("unclaimedReservation", 0, MAX_ALLOCATION)) {
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(64));
+          assertTrue(reservation.add(64L));
         }
         rootAllocator.verify();
       }
@@ -972,8 +972,8 @@ public void testAllocator_claimedReservation() throws Exception {
           rootAllocator.newChildAllocator("claimedReservation", 0, MAX_ALLOCATION)) {
 
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(32));
-          assertTrue(reservation.add(32));
+          assertTrue(reservation.add(32L));
+          assertTrue(reservation.add(32L));
 
           final ArrowBuf arrowBuf = reservation.allocateBuffer();
           assertEquals(64, arrowBuf.capacity());
diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
index bdad3700cb311..9319d15aaa9a9 100644
--- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
+++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
@@ -38,7 +38,7 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
 
   private final ArrowBuf arrowBuf;
   private final ArrowByteBufAllocator arrowByteBufAllocator;
-  private int length;
+  private long length;
   private final long address;
 
   /**
@@ -47,10 +47,24 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
    * @param arrowBuf The buffer to wrap.
    * @param bufferAllocator The allocator for the buffer.
    * @param length The length of this buffer.
+   * @deprecated Use {@link #NettyArrowBuf(ArrowBuf, BufferAllocator, long)} instead.
    */
+  @Deprecated(forRemoval = true)
   public NettyArrowBuf(
       final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final int length) {
-    super(length);
+    this(arrowBuf, bufferAllocator, (long) length);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param arrowBuf The buffer to wrap.
+   * @param bufferAllocator The allocator for the buffer.
+   * @param length The length of this buffer.
+   */
+  public NettyArrowBuf(
+      final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final long length) {
+    super((int) length);
     this.arrowBuf = arrowBuf;
     this.arrowByteBufAllocator = new ArrowByteBufAllocator(bufferAllocator);
     this.length = length;
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index f2d4d2d0fe3bc..6cf573dd4d381 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -56,7 +56,6 @@ under the License.
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-core</artifactId>
-      <version>1.3.14</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
index f275090aae6bf..1154809cae753 100644
--- a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
+++ b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
@@ -57,9 +57,9 @@ public void defaultAllocatorBenchmark() {
   @BenchmarkMode(Mode.AverageTime)
   @OutputTimeUnit(TimeUnit.MICROSECONDS)
   public void segmentRoundingPolicyBenchmark() {
-    final int bufferSize = 1024;
+    final long bufferSize = 1024L;
     final int numBuffers = 1024;
-    final int segmentSize = 1024;
+    final long segmentSize = 1024L;
 
     RoundingPolicy policy = new SegmentRoundingPolicy(segmentSize);
     try (RootAllocator allocator =
diff --git a/java/pom.xml b/java/pom.xml
index a73453df68fd2..81e652f462e02 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -93,11 +93,11 @@ under the License.
   <properties>
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
-    <dep.junit.jupiter.version>5.10.3</dep.junit.jupiter.version>
-    <dep.slf4j.version>2.0.13</dep.slf4j.version>
+    <dep.junit.jupiter.version>5.11.0</dep.junit.jupiter.version>
+    <dep.slf4j.version>2.0.16</dep.slf4j.version>
     <dep.guava-bom.version>33.2.1-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.112.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.66.0</dep.grpc-bom.version>
+    <dep.grpc-bom.version>1.65.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.25.4</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.17.2</dep.jackson-bom.version>
     <dep.hadoop.version>3.4.0</dep.hadoop.version>
@@ -107,10 +107,11 @@ under the License.
     <forkCount>2</forkCount>
     <checkstyle.version>10.17.0</checkstyle.version>
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
-    <error_prone_core.version>2.30.0</error_prone_core.version>
+    <error_prone_core.version>2.31.0</error_prone_core.version>
     <mockito.core.version>5.11.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
     <checker.framework.version>3.46.0</checker.framework.version>
+    <logback.version>1.5.7</logback.version>
     <doclint>none</doclint>
     <additionalparam>-Xdoclint:none</additionalparam>
     <!-- List of add-opens arg line arguments for tests -->
@@ -221,6 +222,16 @@ under the License.
         <type>pom</type>
         <scope>import</scope>
       </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-classic</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-core</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -268,13 +279,13 @@ under the License.
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-junit-jupiter</artifactId>
-      <version>5.12.0</version>
+      <version>5.13.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.4.14</version>
+      <version>${logback.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 94566495dff19..d261496040b78 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -54,12 +54,11 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.4.14</version>
       <scope>test</scope>
     </dependency>
     <!--
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
index f643306cfdcff..12edd6557bd9c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
@@ -305,38 +305,43 @@ public void setValueCount(int valueCount) {
     while (valueCount > getOffsetBufferValueCapacity()) {
       reallocateBuffers();
     }
-    final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector();
+    final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector();
     vector.setValueCount(childValueCount);
   }
 
-  protected int getLengthOfChildVector() {
+  /**
+   * Get the end of the child vector via the maximum view length. This method deduces the length by
+   * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+   *
+   * @return the end of the child vector.
+   */
+  protected int getMaxViewEndChildVector() {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < valueCount; i++) {
       int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
-  protected int getLengthOfChildVectorByIndex(int index) {
+  /**
+   * Get the end of the child vector via the maximum view length of the child vector by index.
+   *
+   * @return the end of the child vector by index
+   */
+  protected int getMaxViewEndChildVectorByIndex(int index) {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < index; i++) {
       int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
   /**
@@ -390,7 +395,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt((long) index * OFFSET_WIDTH, prevOffset);
     }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
index 031cc8037bb8b..e6213316b55a3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
@@ -304,38 +304,44 @@ public void setValueCount(int valueCount) {
     while (valueCount > getOffsetBufferValueCapacity()) {
       reallocateBuffers();
     }
-    final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector();
+    final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector();
     vector.setValueCount(childValueCount);
   }
 
-  protected int getLengthOfChildVector() {
+  /**
+   * Get the end of the child vector via the maximum view length. This method deduces the length by
+   * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+   *
+   * @return the end of the child vector.
+   */
+  protected int getMaxViewEndChildVector() {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < valueCount; i++) {
       int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
-  protected int getLengthOfChildVectorByIndex(int index) {
+  /**
+   * Get the end of the child vector via the maximum view length of the child vector by index.
+   *
+   * @return the end of the child vector by index
+   */
+  protected int getMaxViewEndChildVectorByIndex(int index) {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
+    // int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < index; i++) {
       int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
   /**
@@ -389,7 +395,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
index 2c61f799a4cf9..84c6f03edb25d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
@@ -250,7 +250,9 @@ public List<ArrowBuf> getFieldBuffers() {
    */
   @Override
   public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, long nullValue) {
-    throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet");
+    exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true);
   }
 
   @Override
@@ -851,7 +853,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
@@ -943,7 +945,7 @@ public void setValueCount(int valueCount) {
       }
     }
     /* valueCount for the data vector is the current end offset */
-    final long childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector();
+    final long childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector();
     /* set the value count of data vector and this will take care of
      * checking whether data buffer needs to be reallocated.
      * TODO: revisit when 64-bit vectors are supported
@@ -1001,7 +1003,7 @@ public double getDensity() {
     if (valueCount == 0) {
       return 0.0D;
     }
-    final double totalListSize = getLengthOfChildVector();
+    final double totalListSize = getMaxViewEndChildVector();
     return totalListSize / valueCount;
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
index 7f6d92f3be9c8..9b4e6b4c0cd4a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
@@ -858,7 +858,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
@@ -942,7 +942,7 @@ public void setValueCount(int valueCount) {
       }
     }
     /* valueCount for the data vector is the current end offset */
-    final int childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector();
+    final int childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector();
     /* set the value count of data vector and this will take care of
      * checking whether data buffer needs to be reallocated.
      */
@@ -1005,7 +1005,7 @@ public double getDensity() {
     if (valueCount == 0) {
       return 0.0D;
     }
-    final double totalListSize = getLengthOfChildVector();
+    final double totalListSize = getMaxViewEndChildVector();
     return totalListSize / valueCount;
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
index 2ed8d4d7005ea..26e7bb4a0d3b2 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
@@ -2095,6 +2095,140 @@ public void testOutOfOrderOffsetSplitAndTransfer() {
     }
   }
 
+  @Test
+  public void testRangeChildVector1() {
+    /*
+     * Non-overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [4, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[0, 1, 2, 3], [2]]
+     * */
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("largelistview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 1);
+
+      largeListViewVector.setOffset(0, 0);
+      largeListViewVector.setOffset(1, 2);
+
+      largeListViewVector.setSize(0, 4);
+      largeListViewVector.setSize(1, 1);
+
+      assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+      largeListViewVector.setValueCount(2);
+      assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector2() {
+    /*
+     * Overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [3, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[1, 2, 3], [2]]
+     * */
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("largelistview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 1);
+
+      largeListViewVector.setOffset(0, 1);
+      largeListViewVector.setOffset(1, 2);
+
+      largeListViewVector.setSize(0, 3);
+      largeListViewVector.setSize(1, 1);
+
+      assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+      largeListViewVector.setValueCount(2);
+      assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
   private void writeIntValues(UnionLargeListViewWriter writer, int[] values) {
     writer.startListView();
     for (int v : values) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
index 4fa808c18aece..639585fc48d0a 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
@@ -2084,6 +2084,138 @@ public void testOutOfOrderOffsetSplitAndTransfer() {
     }
   }
 
+  @Test
+  public void testRangeChildVector1() {
+    /*
+     * Non-overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [4, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[0, 1, 2, 3], [2]]
+     * */
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      listViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = listViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      listViewVector.setValidity(0, 1);
+      listViewVector.setValidity(1, 1);
+
+      listViewVector.setOffset(0, 0);
+      listViewVector.setOffset(1, 2);
+
+      listViewVector.setSize(0, 4);
+      listViewVector.setSize(1, 1);
+
+      assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+      listViewVector.setValueCount(2);
+      assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector2() {
+    /*
+     * Overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [3, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[1, 2, 3], [2]]
+     * */
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      listViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = listViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      listViewVector.setValidity(0, 1);
+      listViewVector.setValidity(1, 1);
+
+      listViewVector.setOffset(0, 1);
+      listViewVector.setOffset(1, 2);
+
+      listViewVector.setSize(0, 3);
+      listViewVector.setSize(1, 1);
+
+      assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+      listViewVector.setValueCount(2);
+      assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
   private void writeIntValues(UnionListViewWriter writer, int[] values) {
     writer.startListView();
     for (int v : values) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index 69e16dc470351..afbc30f019ef6 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -60,10 +60,12 @@
 import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VariableWidthFieldVector;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.StructVector;
@@ -760,4 +762,36 @@ public static void setVector(ListViewVector vector, List<Integer>... values) {
     dataVector.setValueCount(curPos);
     vector.setValueCount(values.length);
   }
+
+  /** Populate values for {@link ListViewVector}. */
+  public static void setVector(LargeListViewVector vector, List<Integer>... values) {
+    vector.allocateNewSafe();
+    Types.MinorType type = Types.MinorType.INT;
+    vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+    IntVector dataVector = (IntVector) vector.getDataVector();
+    dataVector.allocateNew();
+
+    // set underlying vectors
+    int curPos = 0;
+    for (int i = 0; i < values.length; i++) {
+      vector
+          .getOffsetBuffer()
+          .setInt((long) i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH, curPos);
+      if (values[i] == null) {
+        BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+      } else {
+        BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+        for (int value : values[i]) {
+          dataVector.setSafe(curPos, value);
+          curPos += 1;
+        }
+      }
+      vector
+          .getSizeBuffer()
+          .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH, values[i].size());
+    }
+    dataVector.setValueCount(curPos);
+    vector.setValueCount(values.length);
+  }
 }
diff --git a/js/package.json b/js/package.json
index cbf0670e018b6..d8a784b784d3c 100644
--- a/js/package.json
+++ b/js/package.json
@@ -72,7 +72,7 @@
     "@types/glob": "8.1.0",
     "@types/jest": "29.5.12",
     "@types/multistream": "4.1.3",
-    "@typescript-eslint/eslint-plugin": "7.12.0",
+    "@typescript-eslint/eslint-plugin": "7.18.0",
     "@typescript-eslint/parser": "7.14.1",
     "async-done": "2.0.0",
     "benny": "3.7.1",
@@ -95,7 +95,7 @@
     "gulp-terser": "2.1.0",
     "gulp-typescript": "5.0.1",
     "gulp-vinyl-size": "1.1.4",
-    "ix": "6.0.0",
+    "ix": "7.0.0",
     "jest": "29.7.0",
     "jest-silent-reporter": "0.6.0",
     "memfs": "4.9.2",
diff --git a/js/yarn.lock b/js/yarn.lock
index dc1fc99a0ecf4..e8223fba9aad2 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1201,9 +1201,9 @@
   integrity sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==
 
 "@swc/helpers@^0.5.11":
-  version "0.5.11"
-  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7"
-  integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A==
+  version "0.5.12"
+  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.12.tgz#37aaca95284019eb5d2207101249435659709f4b"
+  integrity sha512-KMZNXiGibsW9kvZAO1Pam2JPTDBm+KSHMMHWdsyI/1DbIZjT2A6Gy3hblVXUMEDvUAKq+e0vL0X0o54owWji7g==
   dependencies:
     tslib "^2.4.0"
 
@@ -1421,16 +1421,16 @@
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.12.0.tgz#f87a32e8972b8a60024f2f8f12205e7c8108bc41"
-  integrity sha512-7F91fcbuDf/d3S8o21+r3ZncGIke/+eWk0EpO21LXhDfLahriZF9CGj4fbAetEjlaBdjdSm9a6VeXbpbT6Z40Q==
+"@typescript-eslint/eslint-plugin@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.18.0.tgz#b16d3cf3ee76bf572fdf511e79c248bdec619ea3"
+  integrity sha512-94EQTWZ40mzBc42ATNIBimBEDltSJ9RQHCC8vc/PDbxi4k8dVwUAv4o98dk50M1zB+JGFxp43FP7f8+FP8R6Sw==
   dependencies:
     "@eslint-community/regexpp" "^4.10.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/type-utils" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/type-utils" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
     graphemer "^1.4.0"
     ignore "^5.3.1"
     natural-compare "^1.4.0"
@@ -1447,14 +1447,6 @@
     "@typescript-eslint/visitor-keys" "7.14.1"
     debug "^4.3.4"
 
-"@typescript-eslint/scope-manager@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58"
-  integrity sha512-itF1pTnN6F3unPak+kutH9raIkL3lhH1YRPGgt7QQOh43DQKVJXmWkpb+vpc/TiDHs6RSd9CTbDsc/Y+Ygq7kg==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-
 "@typescript-eslint/scope-manager@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.14.1.tgz#63de7a577bc6fe8ee6e412a5b85499f654b93ee5"
@@ -1463,39 +1455,33 @@
     "@typescript-eslint/types" "7.14.1"
     "@typescript-eslint/visitor-keys" "7.14.1"
 
-"@typescript-eslint/type-utils@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908"
-  integrity sha512-lib96tyRtMhLxwauDWUp/uW3FMhLA6D0rJ8T7HmH7x23Gk1Gwwu8UZ94NMXBvOELn6flSPiBrCKlehkiXyaqwA==
+"@typescript-eslint/scope-manager@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.18.0.tgz#c928e7a9fc2c0b3ed92ab3112c614d6bd9951c83"
+  integrity sha512-jjhdIE/FPF2B7Z1uzc6i3oWKbGcHb87Qw7AWj6jmEqNOfDFbJWtjt/XfwCpvNkpGWlcJaog5vTR+VV8+w9JflA==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+
+"@typescript-eslint/type-utils@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.18.0.tgz#2165ffaee00b1fbbdd2d40aa85232dab6998f53b"
+  integrity sha512-XL0FJXuCLaDuX2sYqZUUSOJ2sG5/i1AAze+axqmLnSkNEVMVYLF+cbwlB2w8D1tinFuSikHmFta+P+HOofrLeA==
   dependencies:
-    "@typescript-eslint/typescript-estree" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
     debug "^4.3.4"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/types@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981"
-  integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg==
-
 "@typescript-eslint/types@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.14.1.tgz#a43a540dbe5df7f2a11269683d777fc50b4350aa"
   integrity sha512-mL7zNEOQybo5R3AavY+Am7KLv8BorIv7HCYS5rKoNZKQD9tsfGUpO4KdAn3sSUvTiS4PQkr2+K0KJbxj8H9NDg==
 
-"@typescript-eslint/typescript-estree@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9"
-  integrity sha512-5bwqLsWBULv1h6pn7cMW5dXX/Y2amRqLaKqsASVwbBHMZSnHqE/HN4vT4fE0aFsiwxYvr98kqOWh1a8ZKXalCQ==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-    debug "^4.3.4"
-    globby "^11.1.0"
-    is-glob "^4.0.3"
-    minimatch "^9.0.4"
-    semver "^7.6.0"
-    ts-api-utils "^1.3.0"
+"@typescript-eslint/types@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.18.0.tgz#b90a57ccdea71797ffffa0321e744f379ec838c9"
+  integrity sha512-iZqi+Ds1y4EDYUtlOOC+aUmxnE9xS/yCigkjA7XpTKV6nCBd3Hp/PRGGmdwnfkV2ThMyYldP1wRpm/id99spTQ==
 
 "@typescript-eslint/typescript-estree@7.14.1":
   version "7.14.1"
@@ -1511,23 +1497,29 @@
     semver "^7.6.0"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0"
-  integrity sha512-Y6hhwxwDx41HNpjuYswYp6gDbkiZ8Hin9Bf5aJQn1bpTs3afYY4GX+MPYxma8jtoIV2GRwTM/UJm/2uGCVv+DQ==
+"@typescript-eslint/typescript-estree@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.18.0.tgz#b5868d486c51ce8f312309ba79bdb9f331b37931"
+  integrity sha512-aP1v/BSPnnyhMHts8cf1qQ6Q1IFwwRvAQGRvBFkWlo3/lH29OXA3Pts+c10nxRxIBrDnoMqzhgdwVe5f2D6OzA==
   dependencies:
-    "@eslint-community/eslint-utils" "^4.4.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/typescript-estree" "7.12.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+    debug "^4.3.4"
+    globby "^11.1.0"
+    is-glob "^4.0.3"
+    minimatch "^9.0.4"
+    semver "^7.6.0"
+    ts-api-utils "^1.3.0"
 
-"@typescript-eslint/visitor-keys@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.12.0.tgz#c053b55a996679528beeedd8e565710ce1ae1ad3"
-  integrity sha512-uZk7DevrQLL3vSnfFl5bj4sL75qC9D6EdjemIdbtkuUmIheWpuiiylSY01JxJE7+zGrOWDZrp1WxOuDntvKrHQ==
+"@typescript-eslint/utils@7.18.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.18.0.tgz#bca01cde77f95fc6a8d5b0dbcbfb3d6ca4be451f"
+  integrity sha512-kK0/rNa2j74XuHVcoCZxdFBMF+aq/vH83CXAOHieC+2Gis4mF8jJXT5eAfyD3K0sAxtPuwxaIOIOvhwzVDt/kw==
   dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    eslint-visitor-keys "^3.4.3"
+    "@eslint-community/eslint-utils" "^4.4.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
 
 "@typescript-eslint/visitor-keys@7.14.1":
   version "7.14.1"
@@ -1537,6 +1529,14 @@
     "@typescript-eslint/types" "7.14.1"
     eslint-visitor-keys "^3.4.3"
 
+"@typescript-eslint/visitor-keys@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.18.0.tgz#0564629b6124d67607378d0f0332a0495b25e7d7"
+  integrity sha512-cDF0/Gf81QpY3xYyJKDV14Zwdmid5+uuENhjH2EqFaF0ni+yAyq/LzMaIJdhNJXZI7uLzwIlA+V7oWoyn6Curg==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    eslint-visitor-keys "^3.4.3"
+
 "@ungap/structured-clone@^1.2.0":
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
@@ -4381,10 +4381,10 @@ istextorbinary@^3.0.0:
     binaryextensions "^2.2.0"
     textextensions "^3.2.0"
 
-ix@6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/ix/-/ix-6.0.0.tgz#c1875523f8090c7146dc3ac3412a763663887f27"
-  integrity sha512-B/KeYkHtOWbr3ttckqWT9uha2ixw9fGVDxX+DwVXhO+P5eOhyCadt+aC30hRBvG+do+tbI3xbYDMYN6dp1C4Vw==
+ix@7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/ix/-/ix-7.0.0.tgz#df4c9a242614178f0836aa3cd1965441fae301d1"
+  integrity sha512-hgVnphYh+ytIEsmjeym5wP2GPaM3+RZf7zCrZXE7gjwwmpIBEg0t6GRX7BbdXzTosXCstEAzdPxpyplGBYnIbw==
   dependencies:
     "@types/node" ">=13.7.4"
     tslib "^2.6.2"
diff --git a/python/.gitignore b/python/.gitignore
index ce7f065412728..fbc3b192433b9 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -17,6 +17,7 @@ Testing/
 *.cpp
 pyarrow/lib.h
 pyarrow/*_api.h
+pyarrow/_cuda.h
 pyarrow/_generated_version.py
 cython_debug
 
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 5d5eeaf8157b4..eda4ff4ca5f07 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -84,7 +84,7 @@ set(CMAKE_MACOSX_RPATH 1)
 if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
   set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
 else()
-  set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
+  set(CMAKE_OSX_DEPLOYMENT_TARGET 12.0)
 endif()
 
 # Generate a Clang compile_commands.json "compilation database" file for use
@@ -339,17 +339,17 @@ set(PYARROW_CPP_SRCS
     ${PYARROW_CPP_SOURCE_DIR}/gdb.cc
     ${PYARROW_CPP_SOURCE_DIR}/helpers.cc
     ${PYARROW_CPP_SOURCE_DIR}/inference.cc
-    ${PYARROW_CPP_SOURCE_DIR}/init.cc
     ${PYARROW_CPP_SOURCE_DIR}/io.cc
     ${PYARROW_CPP_SOURCE_DIR}/ipc.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
+    ${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_test.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/serialize.cc
     ${PYARROW_CPP_SOURCE_DIR}/udf.cc)
-set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc
+set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
                             PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                        SKIP_UNITY_BUILD_INCLUSION ON)
 
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 807bcdc315036..d31c93119b73a 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -172,9 +172,7 @@ def print_entry(label, value):
                          union, sparse_union, dense_union,
                          dictionary,
                          run_end_encoded,
-                         fixed_shape_tensor,
-                         opaque,
-                         bool8,
+                         bool8, fixed_shape_tensor, opaque, uuid,
                          field,
                          type_for_alias,
                          DataType, DictionaryType, StructType,
@@ -184,8 +182,9 @@ def print_entry(label, value):
                          TimestampType, Time32Type, Time64Type, DurationType,
                          FixedSizeBinaryType, Decimal128Type, Decimal256Type,
                          BaseExtensionType, ExtensionType,
-                         RunEndEncodedType, FixedShapeTensorType, OpaqueType,
-                         Bool8Type, PyExtensionType, UnknownExtensionType,
+                         RunEndEncodedType, Bool8Type, FixedShapeTensorType,
+                         OpaqueType, UuidType,
+                         PyExtensionType, UnknownExtensionType,
                          register_extension_type, unregister_extension_type,
                          DictionaryMemo,
                          KeyValueMetadata,
@@ -218,8 +217,9 @@ def print_entry(label, value):
                          Time32Array, Time64Array, DurationArray,
                          MonthDayNanoIntervalArray,
                          Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
-                         RunEndEncodedArray, FixedShapeTensorArray, OpaqueArray,
-                         Bool8Array, scalar, NA, _NULL as NULL, Scalar,
+                         RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
+                         OpaqueArray, UuidArray,
+                         scalar, NA, _NULL as NULL, Scalar,
                          NullScalar, BooleanScalar,
                          Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
                          UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
@@ -235,8 +235,8 @@ def print_entry(label, value):
                          StringScalar, LargeStringScalar, StringViewScalar,
                          FixedSizeBinaryScalar, DictionaryScalar,
                          MapScalar, StructScalar, UnionScalar,
-                         RunEndEncodedScalar, ExtensionScalar,
-                         FixedShapeTensorScalar, OpaqueScalar, Bool8Scalar)
+                         RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
+                         FixedShapeTensorScalar, OpaqueScalar, UuidScalar)
 
 # Buffers, allocation
 from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 0e860eaf4c6b8..d39120934d5fd 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -33,7 +33,10 @@ from pyarrow.util import _DEPR_MSG
 from libcpp cimport bool as c_bool
 
 import inspect
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import warnings
 
 
@@ -43,6 +46,11 @@ _substrait_msg = (
 )
 
 
+SUPPORTED_INPUT_ARR_TYPES = (list, tuple)
+if np is not None:
+    SUPPORTED_INPUT_ARR_TYPES += (np.ndarray, )
+
+
 def _pas():
     global __pas
     if __pas is None:
@@ -473,7 +481,7 @@ cdef class MetaFunction(Function):
 
 cdef _pack_compute_args(object values, vector[CDatum]* out):
     for val in values:
-        if isinstance(val, (list, np.ndarray)):
+        if isinstance(val, SUPPORTED_INPUT_ARR_TYPES):
             val = lib.asarray(val)
 
         if isinstance(val, Array):
@@ -2189,7 +2197,7 @@ class QuantileOptions(_QuantileOptions):
 
     def __init__(self, q=0.5, *, interpolation="linear", skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, interpolation, skip_nulls, min_count)
 
@@ -2222,7 +2230,7 @@ class TDigestOptions(_TDigestOptions):
 
     def __init__(self, q=0.5, *, delta=100, buffer_size=500, skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, delta, buffer_size, skip_nulls, min_count)
 
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 77d6c9c06d2de..93c44297590e8 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -50,6 +50,8 @@ cdef _sequence_to_array(object sequence, object mask, object size,
 
 
 cdef inline _is_array_like(obj):
+    if np is None:
+        return False
     if isinstance(obj, np.ndarray):
         return True
     return pandas_api._have_pandas_internal() and pandas_api.is_array_like(obj)
@@ -1608,6 +1610,9 @@ cdef class Array(_PandasConvertible):
         """
         self._assert_cpu()
 
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef:
             PyObject* out
             PandasOptions c_options
@@ -4338,6 +4343,12 @@ cdef class ExtensionArray(Array):
         return result
 
 
+class UuidArray(ExtensionArray):
+    """
+    Concrete class for Arrow arrays of UUID data type.
+    """
+
+
 cdef class FixedShapeTensorArray(ExtensionArray):
     """
     Concrete class for fixed shape tensor extension arrays.
diff --git a/python/pyarrow/builder.pxi b/python/pyarrow/builder.pxi
index 2af39e2c589e6..fbab5bbdb5a01 100644
--- a/python/pyarrow/builder.pxi
+++ b/python/pyarrow/builder.pxi
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
+
 
 cdef class StringBuilder(_Weakrefable):
     """
@@ -42,10 +44,10 @@ cdef class StringBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringBuilder only accepts string objects')
 
@@ -108,10 +110,10 @@ cdef class StringViewBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringViewBuilder only accepts string objects')
 
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 29c850c142da1..10a2e72f923cb 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -25,7 +25,6 @@
 from pyarrow.tests.util import windows_has_tzdata
 import sys
 
-import numpy as np
 
 groups = [
     'acero',
@@ -46,6 +45,8 @@
     'lz4',
     'memory_leak',
     'nopandas',
+    'nonumpy',
+    'numpy',
     'orc',
     'pandas',
     'parquet',
@@ -81,6 +82,8 @@
     'lz4': Codec.is_available('lz4'),
     'memory_leak': False,
     'nopandas': False,
+    'nonumpy': False,
+    'numpy': False,
     'orc': False,
     'pandas': False,
     'parquet': False,
@@ -158,6 +161,12 @@
 except ImportError:
     defaults['nopandas'] = True
 
+try:
+    import numpy  # noqa
+    defaults['numpy'] = True
+except ImportError:
+    defaults['nonumpy'] = True
+
 try:
     import pyarrow.parquet  # noqa
     defaults['parquet'] = True
@@ -327,6 +336,7 @@ def unary_agg_func_fixture():
     Register a unary aggregate function (mean)
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, x):
         return pa.scalar(np.nanmean(x))
@@ -352,6 +362,7 @@ def varargs_agg_func_fixture():
     Register a unary aggregate function
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, *args):
         sum = 0.0
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6f510cfc0c06c..8e6922a912a32 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -983,6 +983,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
         CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool)
 
+        c_bool is_cpu() const
+
         CStatus Validate() const
         CStatus ValidateFull() const
 
@@ -2865,6 +2867,16 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":
         shared_ptr[CArray] storage()
 
 
+cdef extern from "arrow/extension/uuid.h" namespace "arrow::extension" nogil:
+    cdef cppclass CUuidType" arrow::extension::UuidType"(CExtensionType):
+
+        @staticmethod
+        CResult[shared_ptr[CDataType]] Make()
+
+    cdef cppclass CUuidArray" arrow::extension::UuidArray"(CExtensionArray):
+        pass
+
+
 cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension" nogil:
     cdef cppclass CFixedShapeTensorType \
             " arrow::extension::FixedShapeTensorType"(CExtensionType):
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 9fcc97aaf0a9c..96725c9c3862b 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -248,7 +248,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
     CResult[PyObject*] StringToTzinfo(c_string)
 
 
-cdef extern from "arrow/python/init.h":
+cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py":
     int arrow_init_numpy() except -1
 
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index a7c3b496a0045..1caf58e20e653 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -222,6 +222,9 @@ cdef class OpaqueType(BaseExtensionType):
     cdef:
         const COpaqueType* opaque_ext_type
 
+cdef class UuidType(BaseExtensionType):
+    cdef:
+        const CUuidType* uuid_ext_type
 
 cdef class PyExtensionType(ExtensionType):
     pass
@@ -503,6 +506,8 @@ cdef class ChunkedArray(_PandasConvertible):
     cdef:
         shared_ptr[CChunkedArray] sp_chunked_array
         CChunkedArray* chunked_array
+        c_bool _is_cpu
+        c_bool _init_is_cpu
 
     cdef readonly:
         # To allow Table to propagate metadata to pandas.Series
@@ -513,7 +518,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
 
 cdef class _Tabular(_PandasConvertible):
-    pass
+    cdef void _assert_cpu(self) except *
 
 
 cdef class Table(_Tabular):
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index c72841c299566..6b82eb6566896 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -21,7 +21,10 @@
 
 import datetime
 import decimal as _pydecimal
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import os
 import sys
 
@@ -32,8 +35,11 @@ from pyarrow.includes.common cimport PyObject_to_object
 cimport pyarrow.includes.libarrow_python as libarrow_python
 cimport cpython as cp
 
-# Initialize NumPy C API
-arrow_init_numpy()
+
+# Initialize NumPy C API only if numpy was able to be imported
+if np is not None:
+    arrow_init_numpy()
+
 # Initialize PyArrow C++ API
 # (used from some of our C++ code, see e.g. ARROW-5260)
 import_pyarrow()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index fcccf564fc619..7fbde36bc23e9 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -30,13 +30,17 @@
 import re
 import warnings
 
-import numpy as np
-
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
 
 
 _logical_type_map = {}
+_numpy_logical_type_map = {}
+_pandas_logical_type_map = {}
 
 
 def get_logical_type_map():
@@ -85,27 +89,32 @@ def get_logical_type(arrow_type):
         return 'object'
 
 
-_numpy_logical_type_map = {
-    np.bool_: 'bool',
-    np.int8: 'int8',
-    np.int16: 'int16',
-    np.int32: 'int32',
-    np.int64: 'int64',
-    np.uint8: 'uint8',
-    np.uint16: 'uint16',
-    np.uint32: 'uint32',
-    np.uint64: 'uint64',
-    np.float32: 'float32',
-    np.float64: 'float64',
-    'datetime64[D]': 'date',
-    np.str_: 'string',
-    np.bytes_: 'bytes',
-}
+def get_numpy_logical_type_map():
+    global _numpy_logical_type_map
+    if not _numpy_logical_type_map:
+        _numpy_logical_type_map.update({
+            np.bool_: 'bool',
+            np.int8: 'int8',
+            np.int16: 'int16',
+            np.int32: 'int32',
+            np.int64: 'int64',
+            np.uint8: 'uint8',
+            np.uint16: 'uint16',
+            np.uint32: 'uint32',
+            np.uint64: 'uint64',
+            np.float32: 'float32',
+            np.float64: 'float64',
+            'datetime64[D]': 'date',
+            np.str_: 'string',
+            np.bytes_: 'bytes',
+        })
+    return _numpy_logical_type_map
 
 
 def get_logical_type_from_numpy(pandas_collection):
+    numpy_logical_type_map = get_numpy_logical_type_map()
     try:
-        return _numpy_logical_type_map[pandas_collection.dtype.type]
+        return numpy_logical_type_map[pandas_collection.dtype.type]
     except KeyError:
         if hasattr(pandas_collection.dtype, 'tz'):
             return 'datetimetz'
@@ -1023,18 +1032,23 @@ def _is_generated_index_name(name):
     return re.match(pattern, name) is not None
 
 
-_pandas_logical_type_map = {
-    'date': 'datetime64[D]',
-    'datetime': 'datetime64[ns]',
-    'datetimetz': 'datetime64[ns]',
-    'unicode': np.str_,
-    'bytes': np.bytes_,
-    'string': np.str_,
-    'integer': np.int64,
-    'floating': np.float64,
-    'decimal': np.object_,
-    'empty': np.object_,
-}
+def get_pandas_logical_type_map():
+    global _pandas_logical_type_map
+
+    if not _pandas_logical_type_map:
+        _pandas_logical_type_map.update({
+            'date': 'datetime64[D]',
+            'datetime': 'datetime64[ns]',
+            'datetimetz': 'datetime64[ns]',
+            'unicode': np.str_,
+            'bytes': np.bytes_,
+            'string': np.str_,
+            'integer': np.int64,
+            'floating': np.float64,
+            'decimal': np.object_,
+            'empty': np.object_,
+        })
+    return _pandas_logical_type_map
 
 
 def _pandas_type_to_numpy_type(pandas_type):
@@ -1050,8 +1064,9 @@ def _pandas_type_to_numpy_type(pandas_type):
     dtype : np.dtype
         The dtype that corresponds to `pandas_type`.
     """
+    pandas_logical_type_map = get_pandas_logical_type_map()
     try:
-        return _pandas_logical_type_map[pandas_type]
+        return pandas_logical_type_map[pandas_type]
     except KeyError:
         if 'mixed' in pandas_type:
             # catching 'mixed', 'mixed-integer' and 'mixed-integer-float'
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 19a26bd6c683d..d3e2ff2e99d91 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -120,14 +120,17 @@ cdef api object pyarrow_wrap_data_type(
     elif type.get().id() == _Type_EXTENSION:
         ext_type = <const CExtensionType*> type.get()
         cpy_ext_type = dynamic_cast[_CPyExtensionTypePtr](ext_type)
+        extension_name = ext_type.extension_name()
         if cpy_ext_type != nullptr:
             return cpy_ext_type.GetInstance()
-        elif ext_type.extension_name() == b"arrow.fixed_shape_tensor":
+        elif extension_name == b"arrow.bool8":
+            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.fixed_shape_tensor":
             out = FixedShapeTensorType.__new__(FixedShapeTensorType)
-        elif ext_type.extension_name() == b"arrow.opaque":
+        elif extension_name == b"arrow.opaque":
             out = OpaqueType.__new__(OpaqueType)
-        elif ext_type.extension_name() == b"arrow.bool8":
-            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.uuid":
+            out = UuidType.__new__(UuidType)
         else:
             out = BaseExtensionType.__new__(BaseExtensionType)
     else:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 72ae2aee5f8b3..68f77832c4342 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -17,6 +17,7 @@
 
 import collections
 from cython cimport binding
+from uuid import UUID
 
 
 cdef class Scalar(_Weakrefable):
@@ -1043,6 +1044,15 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+class UuidScalar(ExtensionScalar):
+    """
+    Concrete class for Uuid extension scalar.
+    """
+
+    def as_py(self):
+        return None if self.value is None else UUID(bytes=self.value.as_py())
+
+
 cdef class FixedShapeTensorScalar(ExtensionScalar):
     """
     Concrete class for fixed shape tensor extension scalar.
diff --git a/python/pyarrow/src/arrow/python/gdb.cc b/python/pyarrow/src/arrow/python/gdb.cc
index 6941769e4efe8..7c58bae3342c2 100644
--- a/python/pyarrow/src/arrow/python/gdb.cc
+++ b/python/pyarrow/src/arrow/python/gdb.cc
@@ -22,7 +22,7 @@
 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/datum.h"
-#include "arrow/extension_type.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/ipc/json_simple.h"
 #include "arrow/python/gdb.h"
 #include "arrow/record_batch.h"
@@ -37,6 +37,8 @@
 
 namespace arrow {
 
+using extension::uuid;
+using extension::UuidType;
 using ipc::internal::json::ArrayFromJSON;
 using ipc::internal::json::ChunkedArrayFromJSON;
 using ipc::internal::json::ScalarFromJSON;
@@ -56,29 +58,6 @@ class CustomStatusDetail : public StatusDetail {
   std::string ToString() const override { return "This is a detail"; }
 };
 
-class UuidType : public ExtensionType {
- public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
-
-  std::string extension_name() const override { return "uuid"; }
-
-  bool ExtensionEquals(const ExtensionType& other) const override {
-    return (other.extension_name() == this->extension_name());
-  }
-
-  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
-    return std::make_shared<ExtensionArray>(data);
-  }
-
-  Result<std::shared_ptr<DataType>> Deserialize(
-      std::shared_ptr<DataType> storage_type,
-      const std::string& serialized) const override {
-    return Status::NotImplemented("");
-  }
-
-  std::string Serialize() const override { return "uuid-serialized"; }
-};
-
 std::shared_ptr<Array> SliceArrayFromJSON(const std::shared_ptr<DataType>& ty,
                                           std::string_view json, int64_t offset = 0,
                                           int64_t length = -1) {
diff --git a/python/pyarrow/src/arrow/python/inference.cc b/python/pyarrow/src/arrow/python/inference.cc
index 10116f9afad69..1aa7915ba1e19 100644
--- a/python/pyarrow/src/arrow/python/inference.cc
+++ b/python/pyarrow/src/arrow/python/inference.cc
@@ -395,11 +395,11 @@ class TypeInferrer {
       *keep_going = make_unions_;
     } else if (arrow::py::is_scalar(obj)) {
       RETURN_NOT_OK(VisitArrowScalar(obj, keep_going));
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       RETURN_NOT_OK(VisitDType(PyArray_DescrFromScalar(obj), keep_going));
     } else if (PySet_Check(obj) || (Py_TYPE(obj) == &PyDictValues_Type)) {
       RETURN_NOT_OK(VisitSet(obj, keep_going));
-    } else if (PyArray_Check(obj)) {
+    } else if (has_numpy() && PyArray_Check(obj)) {
       RETURN_NOT_OK(VisitNdarray(obj, keep_going));
     } else if (PyDict_Check(obj)) {
       RETURN_NOT_OK(VisitDict(obj));
diff --git a/python/pyarrow/src/arrow/python/iterators.h b/python/pyarrow/src/arrow/python/iterators.h
index 7b31962dac5b8..8512276848272 100644
--- a/python/pyarrow/src/arrow/python/iterators.h
+++ b/python/pyarrow/src/arrow/python/iterators.h
@@ -22,6 +22,7 @@
 #include "arrow/array/array_primitive.h"
 
 #include "arrow/python/common.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_internal.h"
 
 namespace arrow {
@@ -44,7 +45,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
   // VisitorFunc may set to false to terminate iteration
   bool keep_going = true;
 
-  if (PyArray_Check(obj)) {
+  if (has_numpy() && PyArray_Check(obj)) {
     PyArrayObject* arr_obj = reinterpret_cast<PyArrayObject*>(obj);
     if (PyArray_NDIM(arr_obj) != 1) {
       return Status::Invalid("Only 1D arrays accepted");
@@ -64,6 +65,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
     // This code path is inefficient: callers should implement dedicated
     // logic for non-object arrays.
   }
+
   if (PySequence_Check(obj)) {
     if (PyList_Check(obj) || PyTuple_Check(obj)) {
       // Use fast item access
@@ -101,7 +103,7 @@ inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
 template <class VisitorFunc>
 inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
                                   VisitorFunc&& func) {
-  if (PyArray_Check(mo)) {
+  if (has_numpy() && PyArray_Check(mo)) {
     PyArrayObject* mask = reinterpret_cast<PyArrayObject*>(mo);
     if (PyArray_NDIM(mask) != 1) {
       return Status::Invalid("Mask must be 1D array");
diff --git a/python/pyarrow/src/arrow/python/init.cc b/python/pyarrow/src/arrow/python/numpy_init.cc
similarity index 78%
rename from python/pyarrow/src/arrow/python/init.cc
rename to python/pyarrow/src/arrow/python/numpy_init.cc
index dba293bbe2366..96e2c7b7ccb5c 100644
--- a/python/pyarrow/src/arrow/python/init.cc
+++ b/python/pyarrow/src/arrow/python/numpy_init.cc
@@ -18,7 +18,16 @@
 // Trigger the array import (inversion of NO_IMPORT_ARRAY)
 #define NUMPY_IMPORT_ARRAY
 
-#include "arrow/python/init.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
-int arrow_init_numpy() { return arrow::py::import_numpy(); }
+namespace arrow::py {
+bool numpy_imported = false;
+
+int arrow_init_numpy() {
+  numpy_imported = true;
+  return arrow::py::import_numpy();
+}
+
+bool has_numpy() { return numpy_imported; }
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/init.h b/python/pyarrow/src/arrow/python/numpy_init.h
similarity index 93%
rename from python/pyarrow/src/arrow/python/init.h
rename to python/pyarrow/src/arrow/python/numpy_init.h
index 2e6c954862bd9..36c544c1b51fd 100644
--- a/python/pyarrow/src/arrow/python/init.h
+++ b/python/pyarrow/src/arrow/python/numpy_init.h
@@ -20,7 +20,8 @@
 #include "arrow/python/platform.h"
 #include "arrow/python/visibility.h"
 
-extern "C" {
+namespace arrow::py {
 ARROW_PYTHON_EXPORT
 int arrow_init_numpy();
-}
+bool has_numpy();
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/numpy_internal.h b/python/pyarrow/src/arrow/python/numpy_internal.h
index b9b632f9f9a12..0b4d0be00e42b 100644
--- a/python/pyarrow/src/arrow/python/numpy_internal.h
+++ b/python/pyarrow/src/arrow/python/numpy_internal.h
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
 #include "arrow/status.h"
@@ -155,15 +156,27 @@ inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
 namespace internal {
 
 inline bool PyFloatScalar_Check(PyObject* obj) {
-  return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  if (has_numpy()) {
+    return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  } else {
+    return PyFloat_Check(obj);
+  }
 }
 
 inline bool PyIntScalar_Check(PyObject* obj) {
-  return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  if (has_numpy()) {
+    return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  } else {
+    return PyLong_Check(obj);
+  }
 }
 
 inline bool PyBoolScalar_Check(PyObject* obj) {
-  return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  if (has_numpy()) {
+    return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  } else {
+    return PyBool_Check(obj);
+  }
 }
 
 static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc
index 746bf410911f9..eea6bf9459d1f 100644
--- a/python/pyarrow/src/arrow/python/python_test.cc
+++ b/python/pyarrow/src/arrow/python/python_test.cc
@@ -870,7 +870,7 @@ std::vector<TestCase> GetCppTestCases() {
        TestInferAllLeadingZerosExponentialNotationPositive},
       {"test_infer_all_leading_zeros_exponential_notation_negative",
        TestInferAllLeadingZerosExponentialNotationNegative},
-      {"test_object_block_write_fails", TestObjectBlockWriteFails},
+      {"test_object_block_write_fails_pandas_convert", TestObjectBlockWriteFails},
       {"test_mixed_type_fails", TestMixedTypeFails},
       {"test_from_python_decimal_rescale_not_truncateable",
        TestFromPythonDecimalRescaleNotTruncateable},
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index ce9e15c894ce3..e7195e99072b0 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -202,7 +202,7 @@ class PyValue {
       return true;
     } else if (obj == Py_False) {
       return false;
-    } else if (PyArray_IsScalar(obj, Bool)) {
+    } else if (has_numpy() && PyArray_IsScalar(obj, Bool)) {
       return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
     } else {
       return internal::InvalidValue(obj, "tried to convert to boolean");
@@ -385,7 +385,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -464,7 +464,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -664,7 +664,7 @@ class PyPrimitiveConverter<
       ARROW_ASSIGN_OR_RAISE(
           auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
       // Numpy NaT sentinels can be checked after the conversion
-      if (PyArray_CheckAnyScalarExact(value) &&
+      if (has_numpy() && PyArray_CheckAnyScalarExact(value) &&
           PyValue::IsNaT(this->primitive_type_, converted)) {
         this->primitive_builder_->UnsafeAppendNull();
       } else {
@@ -804,8 +804,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
     if (PyValue::IsNull(this->options_, value)) {
       return this->list_builder_->AppendNull();
     }
-
-    if (PyArray_Check(value)) {
+    if (has_numpy() && PyArray_Check(value)) {
       RETURN_NOT_OK(AppendNdarray(value));
     } else if (PySequence_Check(value)) {
       RETURN_NOT_OK(AppendSequence(value));
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 6d34c71c9df40..3b0df981e017c 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -59,6 +59,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
     def __cinit__(self):
         self.chunked_array = NULL
+        self._init_is_cpu = False
 
     def __init__(self):
         raise TypeError("Do not call ChunkedArray's constructor directly, use "
@@ -69,6 +70,7 @@ cdef class ChunkedArray(_PandasConvertible):
         self.chunked_array = chunked_array.get()
 
     def __reduce__(self):
+        self._assert_cpu()
         return chunked_array, (self.chunks, self.type)
 
     @property
@@ -198,6 +200,7 @@ cdef class ChunkedArray(_PandasConvertible):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.sp_chunked_array.get().ValidateFull())
         else:
@@ -220,6 +223,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.null_count
         1
         """
+        self._assert_cpu()
         return self.chunked_array.null_count()
 
     @property
@@ -245,6 +249,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.nbytes
         49
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -271,6 +276,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.get_total_buffer_size()
         49
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -299,13 +305,14 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         value : Scalar (index) or ChunkedArray (slice)
         """
-
+        self._assert_cpu()
         if isinstance(key, slice):
             return _normalize_slice(self, key)
 
         return self.getitem(_normalize_index(key, self.chunked_array.length()))
 
     cdef getitem(self, int64_t i):
+        self._assert_cpu()
         return Scalar.wrap(GetResultValue(self.chunked_array.GetScalar(i)))
 
     def is_null(self, *, nan_is_null=False):
@@ -338,6 +345,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         options = _pc().NullOptions(nan_is_null=nan_is_null)
         return _pc().call_function('is_null', [self], options)
 
@@ -363,6 +371,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_nan(self)
 
     def is_valid(self):
@@ -388,6 +397,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_valid(self)
 
     def __eq__(self, other):
@@ -430,6 +440,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().fill_null(self, fill_value)
 
     def equals(self, ChunkedArray other):
@@ -458,6 +469,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.equals(animals)
         False
         """
+        self._assert_cpu()
         if other is None:
             return False
 
@@ -472,6 +484,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return result
 
     def _to_pandas(self, options, types_mapper=None, **kwargs):
+        self._assert_cpu()
         return _array_like_to_pandas(self, options, types_mapper=types_mapper)
 
     def to_numpy(self, zero_copy_only=False):
@@ -495,6 +508,10 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_numpy()
         array([  2,   2,   4,   4,   5, 100])
         """
+        self._assert_cpu()
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         if zero_copy_only:
             raise ValueError(
                 "zero_copy_only must be False for pyarrow.ChunkedArray.to_numpy"
@@ -526,6 +543,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return values
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -571,6 +589,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs_seconds.type
         DurationType(duration[s])
         """
+        self._assert_cpu()
         return _pc().cast(self, target_type, safe=safe, options=options)
 
     def dictionary_encode(self, null_encoding='mask'):
@@ -633,6 +652,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         options = _pc().DictionaryEncodeOptions(null_encoding)
         return _pc().call_function('dictionary_encode', [self], options)
 
@@ -697,6 +717,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.type
         DataType(int64)
         """
+        self._assert_cpu()
         cdef:
             vector[shared_ptr[CChunkedArray]] flattened
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -748,6 +769,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         if self.num_chunks == 0:
             return array([], type=self.type)
         else:
@@ -788,6 +810,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         return _pc().call_function('unique', [self])
 
     def value_counts(self):
@@ -834,6 +857,7 @@ cdef class ChunkedArray(_PandasConvertible):
             1
           ]
         """
+        self._assert_cpu()
         return _pc().call_function('value_counts', [self])
 
     def slice(self, offset=0, length=None):
@@ -956,6 +980,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().filter(self, mask, null_selection_behavior)
 
     def index(self, value, start=None, end=None, *, memory_pool=None):
@@ -1003,6 +1028,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.index(4, start=3)
         <pyarrow.Int64Scalar: 3>
         """
+        self._assert_cpu()
         return _pc().index(self, value, start, end, memory_pool=memory_pool)
 
     def take(self, object indices):
@@ -1049,6 +1075,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def drop_null(self):
@@ -1088,6 +1115,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def sort(self, order="ascending", **kwargs):
@@ -1107,6 +1135,7 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         result : ChunkedArray
         """
+        self._assert_cpu()
         indices = _pc().sort_indices(
             self,
             options=_pc().SortOptions(sort_keys=[("", order)], **kwargs)
@@ -1206,6 +1235,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         cdef:
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
             shared_ptr[CChunkedArray] c_result
@@ -1330,6 +1360,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_pylist()
         [2, 2, 4, 4, None, 100]
         """
+        self._assert_cpu()
         result = []
         for i in range(self.num_chunks):
             result += self.chunk(i).to_pylist()
@@ -1351,6 +1382,7 @@ cdef class ChunkedArray(_PandasConvertible):
         PyCapsule
             A capsule containing a C ArrowArrayStream struct.
         """
+        self._assert_cpu()
         cdef:
             ChunkedArray chunked
             ArrowArrayStream* c_stream = NULL
@@ -1407,6 +1439,20 @@ cdef class ChunkedArray(_PandasConvertible):
         self.init(c_chunked_array)
         return self
 
+    @property
+    def is_cpu(self):
+        """
+        Whether all chunks in the ChunkedArray are CPU-accessible.
+        """
+        if not self._init_is_cpu:
+            self._is_cpu = self.chunked_array.is_cpu()
+            self._init_is_cpu = True
+        return self._is_cpu
+
+    def _assert_cpu(self):
+        if not self.is_cpu:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def chunked_array(arrays, type=None):
     """
@@ -1571,6 +1617,7 @@ cdef class _Tabular(_PandasConvertible):
                         f"one of the `{self.__class__.__name__}.from_*` functions instead.")
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -1824,6 +1871,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def field(self, i):
@@ -2085,6 +2133,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[5,100,4,2,4,2]]
         animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
         """
+        self._assert_cpu()
         if isinstance(sorting, str):
             sorting = [(sorting, "ascending")]
 
@@ -2130,6 +2179,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def filter(self, mask, object null_selection_behavior="drop"):
@@ -2199,6 +2249,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[2,4,null]]
         animals: [["Flamingo","Horse",null]]
         """
+        self._assert_cpu()
         if isinstance(mask, _pc().Expression):
             return _pac()._filter_table(self, mask)
         else:
@@ -2399,6 +2450,9 @@ cdef class _Tabular(_PandasConvertible):
         """
         return self.add_column(self.num_columns, field_, column)
 
+    cdef void _assert_cpu(self) except *:
+        return
+
 
 cdef class RecordBatch(_Tabular):
     """
@@ -2509,6 +2563,7 @@ cdef class RecordBatch(_Tabular):
         return self.batch != NULL
 
     def __reduce__(self):
+        self._assert_cpu()
         return _reconstruct_record_batch, (self.columns, self.schema)
 
     def validate(self, *, full=False):
@@ -2528,6 +2583,7 @@ cdef class RecordBatch(_Tabular):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.batch.ValidateFull())
         else:
@@ -2694,6 +2750,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.nbytes
         116
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -2723,6 +2780,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.get_total_buffer_size()
         120
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -2789,12 +2847,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -2882,12 +2947,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -3013,6 +3085,7 @@ cdef class RecordBatch(_Tabular):
         n_legs: [2,2,4,4,5,100]
         animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
         """
+        self._assert_cpu()
         cdef shared_ptr[CBuffer] buffer
         cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
         options.memory_pool = maybe_unbox_memory_pool(memory_pool)
@@ -3114,6 +3187,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.equals(batch_1, check_metadata=True)
         False
         """
+        self._assert_cpu()
         cdef:
             CRecordBatch* this_batch = self.batch
             shared_ptr[CRecordBatch] other_batch = pyarrow_unwrap_batch(other)
@@ -3245,6 +3319,7 @@ cdef class RecordBatch(_Tabular):
         return RecordBatch.from_arrays(newcols, schema=target_schema)
 
     def _to_pandas(self, options, **kwargs):
+        self._assert_cpu()
         return Table.from_batches([self])._to_pandas(options, **kwargs)
 
     @classmethod
@@ -3470,6 +3545,8 @@ cdef class RecordBatch(_Tabular):
         """
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
+        if struct_array.sp_array.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
         with nogil:
             c_record_batch = GetResultValue(
                 CRecordBatch.FromStructArray(struct_array.sp_array))
@@ -3479,6 +3556,7 @@ cdef class RecordBatch(_Tabular):
         """
         Convert to a struct array.
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CArray] c_array
@@ -3557,6 +3635,7 @@ cdef class RecordBatch(_Tabular):
                [ 4., 40.],
                [nan, nan]])
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CTensor] c_tensor
@@ -3683,6 +3762,7 @@ cdef class RecordBatch(_Tabular):
             A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
             respectively.
         """
+        self._assert_cpu()
         cdef:
             ArrowArray* c_array
             ArrowSchema* c_schema
@@ -3728,6 +3808,7 @@ cdef class RecordBatch(_Tabular):
         -------
         PyCapsule
         """
+        self._assert_cpu()
         return Table.from_batches([self]).__arrow_c_stream__(requested_schema)
 
     @staticmethod
@@ -3940,6 +4021,10 @@ cdef class RecordBatch(_Tabular):
         """
         return self.device_type == DeviceAllocationType.CPU
 
+    cdef void _assert_cpu(self) except *:
+        if self.sp_batch.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def _reconstruct_record_batch(columns, schema):
     """
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index 6fb4fc99d7cbc..3e0c63c18fc98 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -107,6 +107,9 @@ strides: {0.strides}""".format(self)
         array([[  2,   2,   4],
                [  4,   5, 100]], dtype=int32)
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out
 
         check_status(TensorToNdarray(self.sp_tensor, self, &out))
@@ -478,6 +481,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_coords
 
@@ -743,6 +749,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -981,6 +990,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -1216,6 +1228,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index e1919497b5116..0b82696d0a73f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -25,6 +25,7 @@
 
 import pytest
 import hypothesis as h
+
 from ..conftest import groups, defaults
 
 from pyarrow import set_timezone_db_path
@@ -233,17 +234,16 @@ def minio_server_health_check(address):
 def gcs_server():
     port = find_free_port()
     env = os.environ.copy()
-    args = [sys.executable, '-m', 'testbench', '--port', str(port)]
+    exe = 'storage-testbench'
+    args = [exe, '--port', str(port)]
     proc = None
     try:
-        # check first if testbench module is available
-        import testbench  # noqa:F401
         # start server
         proc = subprocess.Popen(args, env=env)
         # Make sure the server is alive.
         if proc.poll() is not None:
             pytest.skip(f"Command {args} did not start server successfully!")
-    except (ModuleNotFoundError, OSError) as e:
+    except OSError as e:
         pytest.skip(f"Command {args} failed to execute: {e}")
     else:
         yield {
diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx
index c1bf9aae1ec03..309b574dc0264 100644
--- a/python/pyarrow/tests/extensions.pyx
+++ b/python/pyarrow/tests/extensions.pyx
@@ -37,7 +37,7 @@ cdef extern from * namespace "arrow::py" nogil:
     class UuidType : public ExtensionType {
     public:
         UuidType() : ExtensionType(fixed_size_binary(16)) {}
-        std::string extension_name() const override { return "uuid"; }
+        std::string extension_name() const override { return "example-uuid"; }
 
         bool ExtensionEquals(const ExtensionType& other) const override {
             return other.extension_name() == this->extension_name();
diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
index 6d91bad57cef4..50da6693afff1 100644
--- a/python/pyarrow/tests/interchange/test_conversion.py
+++ b/python/pyarrow/tests/interchange/test_conversion.py
@@ -16,11 +16,15 @@
 # under the License.
 
 from datetime import datetime as dt
-import numpy as np
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 import pytest
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
     _PyArrowColumn,
@@ -107,13 +111,13 @@ def test_offset_of_sliced_array():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
+    "float, np_float_str", [
         # (pa.float16(), np.float16),   #not supported by pandas
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
-def test_pandas_roundtrip(uint, int, float, np_float):
+def test_pandas_roundtrip(uint, int, float, np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
@@ -122,7 +126,7 @@ def test_pandas_roundtrip(uint, int, float, np_float):
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
         }
     )
@@ -326,13 +330,13 @@ def test_pandas_roundtrip_datetime(unit):
 
 @pytest.mark.pandas
 @pytest.mark.parametrize(
-    "np_float", [np.float32, np.float64]
+    "np_float_str", ["float32", "float64"]
 )
-def test_pandas_to_pyarrow_with_missing(np_float):
+def test_pandas_to_pyarrow_with_missing(np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
-    np_array = np.array([0, np.nan, 2], dtype=np_float)
+    np_array = np.array([0, np.nan, 2], dtype=np.dtype(np_float_str))
     datetime_array = [None, dt(2007, 7, 14), dt(2007, 7, 15)]
     df = pd.DataFrame({
         # float, ColumnNullType.USE_NAN
@@ -364,6 +368,7 @@ def test_pandas_to_pyarrow_float16_with_missing():
         pi.from_dataframe(df)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint", [pa.uint8(), pa.uint16(), pa.uint32()]
 )
@@ -371,16 +376,16 @@ def test_pandas_to_pyarrow_float16_with_missing():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("offset, length", [(0, 3), (0, 2), (1, 2), (2, 1)])
-def test_pyarrow_roundtrip(uint, int, float, np_float,
+def test_pyarrow_roundtrip(uint, int, float, np_float_str,
                            unit, tz, offset, length):
 
     from datetime import datetime as dt
@@ -391,7 +396,7 @@ def test_pyarrow_roundtrip(uint, int, float, np_float,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)),
                           type=float, from_pandas=True),
             "d": [True, False, True],
             "e": [True, False, None],
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index 826089652bca6..d060f7842c2fe 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -19,10 +19,13 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.tests.strategies as past
-import pytest
 
 
 all_types = st.deferred(
@@ -39,6 +42,7 @@
 
 # datetime is tested in test_extra.py
 # dictionary is tested in test_categorical()
+@pytest.mark.numpy
 @h.given(past.arrays(all_types, size=3))
 def test_dtypes(arr):
     table = pa.table([arr], names=["a"])
@@ -51,6 +55,7 @@ def test_dtypes(arr):
     assert df.get_column(0).offset == 0
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint, uint_bw",
     [
@@ -68,17 +73,17 @@ def test_dtypes(arr):
     ]
 )
 @pytest.mark.parametrize(
-    "float, float_bw, np_float", [
-        (pa.float16(), 16, np.float16),
-        (pa.float32(), 32, np.float32),
-        (pa.float64(), 64, np.float64)
+    "float, float_bw, np_float_str", [
+        (pa.float16(), 16, "float16"),
+        (pa.float32(), 32, "float32"),
+        (pa.float64(), 64, "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['', 'America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("use_batch", [False, True])
 def test_mixed_dtypes(uint, uint_bw, int, int_bw,
-                      float, float_bw, np_float, unit, tz,
+                      float, float_bw, np_float_str, unit, tz,
                       use_batch):
     from datetime import datetime as dt
     arr = [1, 2, 3]
@@ -87,7 +92,7 @@ def test_mixed_dtypes(uint, uint_bw, int, int_bw,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
             "e": ["a", "", "c"],
             "f": pa.array(dt_arr, type=pa.timestamp(unit, tz=tz))
@@ -200,16 +205,16 @@ def test_column_get_chunks(use_batch, size, n_chunks):
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("use_batch", [False, True])
-def test_get_columns(uint, int, float, np_float, use_batch):
+def test_get_columns(uint, int, float, np_float_str, use_batch):
     arr = [[1, 2, 3], [4, 5]]
-    arr_float = np.array([1, 2, 3, 4, 5], dtype=np_float)
+    arr_float = np.array([1, 2, 3, 4, 5], dtype=np.dtype(np_float_str))
     table = pa.table(
         {
             "a": pa.chunked_array(arr, type=uint),
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index b4a57ba0b1556..fd6ad94fbd6d3 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -17,7 +17,10 @@
 
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests import util
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 194af7415e863..6496aa99092b8 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -22,7 +22,6 @@
 from shutil import copytree
 from decimal import Decimal
 
-import numpy as np
 import pytest
 
 import pyarrow as pa
@@ -47,6 +46,10 @@
 except ImportError:
     pd = tm = None
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index e6b66b00428fb..79dd96948261c 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -17,8 +17,12 @@
 
 import decimal
 import io
+import random
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -173,6 +177,7 @@ def test_direct_read_dictionary_subfield():
     assert result[0].num_chunks == 1
 
 
+@pytest.mark.numpy
 def test_dictionary_array_automatically_read():
     # ARROW-3246
 
@@ -334,10 +339,10 @@ def test_column_of_lists(tempdir):
 def test_large_list_records():
     # This was fixed in PARQUET-1100
 
-    list_lengths = np.random.randint(0, 500, size=50)
-    list_lengths[::10] = 0
+    list_lengths = [random.randint(0, 500) for _ in range(50)]
+    list_lengths[::10] = [0, 0, 0, 0, 0]
 
-    list_values = [list(map(int, np.random.randint(0, 100, size=x)))
+    list_values = [list(map(int, [random.randint(0, 100) for _ in range(x)]))
                    if i % 8 else None
                    for i, x in enumerate(list_lengths)]
 
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 47e608a1404ff..f68f1aa9cdb46 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -20,7 +20,10 @@
 import os
 import pathlib
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import unittest.mock as mock
 
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index 08fb1098322be..b89fd97cb91e6 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -19,7 +19,10 @@
 import io
 import warnings
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index c29213ebc3d42..14ce9bbfcdd58 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -20,7 +20,10 @@
 from collections import OrderedDict
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -584,7 +587,7 @@ def test_table_large_metadata():
     my_schema = pa.schema([pa.field('f0', 'double')],
                           metadata={'large': 'x' * 10000000})
 
-    table = pa.table([np.arange(10)], schema=my_schema)
+    table = pa.table([range(10)], schema=my_schema)
     _check_roundtrip(table)
 
 
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index b5913bf5c6b6e..2ea2f46873aef 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -18,7 +18,10 @@
 import io
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index db0aa1397123d..7a1b31a4d9d77 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -21,7 +21,10 @@
 import pytest
 import hypothesis as h
 import hypothesis.strategies as st
-import hypothesis.extra.numpy as npst
+try:
+    import hypothesis.extra.numpy as npst
+except ImportError:
+    npst = None
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
@@ -35,7 +38,10 @@
         import tzdata  # noqa:F401
     except ImportError:
         zoneinfo = None
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index cd381cf427dc3..76a766984dab6 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -17,7 +17,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index c44ec3f8e1afe..4160d64829483 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -27,7 +27,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -157,6 +160,7 @@ def test_binary_total_values_length():
     assert large_arr.slice(1, 3).total_values_length == 11
 
 
+@pytest.mark.numpy
 def test_to_numpy_zero_copy():
     arr = pa.array(range(10))
 
@@ -176,6 +180,7 @@ def test_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy_zero_copy():
     elements = [[2, 2, 4], [4, 5, 100]]
 
@@ -191,6 +196,7 @@ def test_chunked_array_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_unsupported_types():
     # ARROW-2871: Some primitive types are not yet supported in to_numpy
     bool_arr = pa.array([True, False, True])
@@ -217,6 +223,7 @@ def test_to_numpy_unsupported_types():
         arr.to_numpy()
 
 
+@pytest.mark.numpy
 def test_to_numpy_writable():
     arr = pa.array(range(10))
     np_arr = arr.to_numpy()
@@ -234,6 +241,7 @@ def test_to_numpy_writable():
         arr.to_numpy(zero_copy_only=True, writable=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize('tz', [None, "UTC"])
 def test_to_numpy_datetime64(unit, tz):
@@ -243,6 +251,7 @@ def test_to_numpy_datetime64(unit, tz):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 def test_to_numpy_timedelta64(unit):
     arr = pa.array([1, 2, 3], pa.duration(unit))
@@ -251,6 +260,7 @@ def test_to_numpy_timedelta64(unit):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_dictionary():
     # ARROW-7591
     arr = pa.array(["a", "b", "a"]).dictionary_encode()
@@ -427,6 +437,11 @@ def test_array_getitem():
         with pytest.raises(IndexError):
             arr[idx]
 
+
+@pytest.mark.numpy
+def test_array_getitem_numpy_scalars():
+    arr = pa.array(range(10, 15))
+    lst = arr.to_pylist()
     # check that numpy scalars are supported
     for idx in range(-len(arr), len(arr)):
         assert arr[np.int32(idx)].as_py() == lst[idx]
@@ -469,9 +484,11 @@ def test_array_slice():
             res.validate()
             expected = arr.to_pylist()[start:stop]
             assert res.to_pylist() == expected
-            assert res.to_numpy().tolist() == expected
+            if np is not None:
+                assert res.to_numpy().tolist() == expected
 
 
+@pytest.mark.numpy
 def test_array_slice_negative_step():
     # ARROW-2714
     np_arr = np.arange(20)
@@ -542,6 +559,7 @@ def test_struct_array_slice():
                                    {'a': 5, 'b': 6.5}]
 
 
+@pytest.mark.numpy
 def test_array_factory_invalid_type():
 
     class MyObject:
@@ -552,6 +570,7 @@ class MyObject:
         pa.array(arr)
 
 
+@pytest.mark.numpy
 def test_array_ref_to_ndarray_base():
     arr = np.array([1, 2, 3])
 
@@ -576,6 +595,7 @@ def test_array_eq():
     assert (arr1 == None) is False  # noqa: E711
 
 
+@pytest.mark.numpy
 def test_array_from_buffers():
     values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
     nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
@@ -773,6 +793,7 @@ def test_dictionary_from_buffers(offset):
     assert a[offset:] == b
 
 
+@pytest.mark.numpy
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -795,6 +816,7 @@ def test_dictionary_from_numpy():
             assert d2[i].as_py() == dictionary[indices[i]]
 
 
+@pytest.mark.numpy
 def test_dictionary_to_numpy():
     expected = pa.array(
         ["foo", "bar", None, "foo"]
@@ -865,6 +887,7 @@ def test_dictionary_to_numpy():
     )
 
 
+@pytest.mark.numpy
 def test_dictionary_from_boxed_arrays():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -910,6 +933,7 @@ def test_dictionary_indices():
     arr.indices.validate(full=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
                          [(pa.ListArray, pa.list_),
                           (pa.LargeListArray, pa.large_list)])
@@ -1052,6 +1076,7 @@ def test_map_from_dict():
     assert tup_arr.equals(dict_arr)
 
 
+@pytest.mark.numpy
 def test_map_from_arrays():
     offsets_arr = np.array([0, 2, 5, 8], dtype='i4')
     offsets = pa.array(offsets_arr, type='int32')
@@ -1472,6 +1497,7 @@ def _check_cast_case(case, *, safe=True, check_array_construction=True):
         assert in_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_integers_safe():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), 'int8',
@@ -1558,6 +1584,7 @@ def test_chunked_array_data_warns():
     assert isinstance(res, pa.ChunkedArray)
 
 
+@pytest.mark.numpy
 def test_cast_integers_unsafe():
     # We let NumPy do the unsafe casting.
     # Note that NEP50 in the NumPy spec no longer allows
@@ -1578,6 +1605,7 @@ def test_cast_integers_unsafe():
         _check_cast_case(case, safe=False)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_safe():
     safe_cases = [
         (np.array([1.0, 2.0, 3.0], dtype='float32'), 'float32',
@@ -1591,6 +1619,7 @@ def test_floating_point_truncate_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_unsafe():
     unsafe_cases = [
         (np.array([1.1, 2.2, 3.3], dtype='float32'), 'float32',
@@ -1635,6 +1664,7 @@ def test_decimal_to_int_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_decimal_to_int_value_out_of_bounds():
     out_of_bounds_cases = [
         (
@@ -1735,6 +1765,7 @@ def test_decimal_to_decimal():
         result = arr.cast(pa.decimal128(5, 2))
 
 
+@pytest.mark.numpy
 def test_safe_cast_nan_to_int_raises():
     arr = pa.array([np.nan, 1.])
 
@@ -1742,6 +1773,7 @@ def test_safe_cast_nan_to_int_raises():
         arr.cast(pa.int64(), safe=True)
 
 
+@pytest.mark.numpy
 def test_cast_signed_to_unsigned():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), pa.uint8(),
@@ -1992,6 +2024,7 @@ def test_dictionary_decode():
         assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time32_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int32'),
                    type=pa.time32('s'))
@@ -2001,6 +2034,7 @@ def test_cast_time32_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.time64('us'))
@@ -2010,6 +2044,7 @@ def test_cast_time64_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_timestamp_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.timestamp('us'))
@@ -2035,6 +2070,7 @@ def test_cast_date32_to_int():
     assert result2.equals(arr)
 
 
+@pytest.mark.numpy
 def test_cast_duration_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.duration('us'))
@@ -2044,6 +2080,7 @@ def test_cast_duration_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_binary_to_utf8():
     binary_arr = pa.array([b'foo', b'bar', b'baz'], type=pa.binary())
     utf8_arr = binary_arr.cast(pa.utf8())
@@ -2064,6 +2101,7 @@ def test_cast_binary_to_utf8():
     assert casted.null_count == 1
 
 
+@pytest.mark.numpy
 def test_cast_date64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.date64())
@@ -2146,6 +2184,7 @@ def test_array_pickle_dictionary(pickle_module):
         assert array.equals(result)
 
 
+@pytest.mark.numpy
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
 @h.given(
     past.arrays(
@@ -2177,9 +2216,9 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         assert result_addresses == addresses
 
 
-@pytest.mark.parametrize(
-    'narr',
-    [
+@pytest.mark.numpy
+def test_to_numpy_roundtrip():
+    for narr in [
         np.arange(10, dtype=np.int64),
         np.arange(10, dtype=np.int32),
         np.arange(10, dtype=np.int16),
@@ -2191,23 +2230,23 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         np.arange(10, dtype=np.float64),
         np.arange(10, dtype=np.float32),
         np.arange(10, dtype=np.float16),
-    ]
-)
-def test_to_numpy_roundtrip(narr):
-    arr = pa.array(narr)
-    assert narr.dtype == arr.to_numpy().dtype
-    np.testing.assert_array_equal(narr, arr.to_numpy())
-    np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
-    np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
-    np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
+    ]:
+        arr = pa.array(narr)
+        assert narr.dtype == arr.to_numpy().dtype
+        np.testing.assert_array_equal(narr, arr.to_numpy())
+        np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
+        np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
+        np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
 
 
+@pytest.mark.numpy
 def test_array_uint64_from_py_over_range():
     arr = pa.array([2 ** 63], type=pa.uint64())
     expected = pa.array(np.array([2 ** 63], dtype='u8'))
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_conversions_no_sentinel_values():
     arr = np.array([1, 2, 3, 4], dtype='int8')
     refcount = sys.getrefcount(arr)
@@ -2249,6 +2288,7 @@ def test_time32_time64_from_integer():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_binary_string_pandas_null_sentinels():
     # ARROW-6227
     def _check_case(ty):
@@ -2259,6 +2299,7 @@ def _check_case(ty):
     _check_case('utf8')
 
 
+@pytest.mark.numpy
 def test_pandas_null_sentinels_raise_error():
     # ARROW-6227
     cases = [
@@ -2299,6 +2340,7 @@ def test_pandas_null_sentinels_index():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_roundtrip_from_numpy_datetimeD():
     arr = np.array([None, datetime.date(2017, 4, 4)], dtype='datetime64[D]')
 
@@ -2319,6 +2361,7 @@ def test_array_from_naive_datetimes():
     assert arr.type == pa.timestamp('us', tz=None)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('datetime64[s]', pa.timestamp('s')),
     ('datetime64[ms]', pa.timestamp('ms')),
@@ -2342,6 +2385,7 @@ def test_array_from_numpy_datetime(dtype, type):
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_different_numpy_datetime_units_raises():
     data = [
         None,
@@ -2356,6 +2400,7 @@ def test_array_from_different_numpy_datetime_units_raises():
         pa.array(data)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['ns', 'us', 'ms', 's'])
 def test_array_from_list_of_timestamps(unit):
     n = np.datetime64('NaT', unit)
@@ -2370,6 +2415,7 @@ def test_array_from_list_of_timestamps(unit):
     assert a1[0] == a2[0]
 
 
+@pytest.mark.numpy
 def test_array_from_timestamp_with_generic_unit():
     n = np.datetime64('NaT')
     x = np.datetime64('2017-01-01 01:01:01.111111111')
@@ -2380,6 +2426,7 @@ def test_array_from_timestamp_with_generic_unit():
         pa.array([n, x, y])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('timedelta64[s]', pa.duration('s')),
     ('timedelta64[ms]', pa.duration('ms')),
@@ -2408,6 +2455,7 @@ def test_array_from_numpy_timedelta(dtype, type):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_timedelta_incorrect_unit():
     # generic (no unit)
     td = np.timedelta64(1)
@@ -2423,6 +2471,7 @@ def test_array_from_numpy_timedelta_incorrect_unit():
             pa.array(data)
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_ascii():
     arr = np.array(['abcde', 'abc', ''], dtype='|S5')
 
@@ -2567,6 +2616,7 @@ def test_interval_array_from_dateoffset():
     assert list(actual_list[0]) == expected_from_pandas
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_unicode():
     dtypes = ['<U5', '>U5']
 
@@ -2599,12 +2649,14 @@ def test_array_from_numpy_unicode():
     assert arrow_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_string_from_non_string():
     # ARROW-5682 - when converting to string raise on non string-like dtype
     with pytest.raises(TypeError):
         pa.array(np.array([1, 2, 3]), type=pa.string())
 
 
+@pytest.mark.numpy
 def test_array_string_from_all_null():
     # ARROW-5682
     vals = np.array([None, None], dtype=object)
@@ -2619,6 +2671,7 @@ def test_array_string_from_all_null():
     assert arr.null_count == 2
 
 
+@pytest.mark.numpy
 def test_array_from_masked():
     ma = np.ma.array([1, 2, 3, 4], dtype='int64',
                      mask=[False, False, True, False])
@@ -2630,6 +2683,7 @@ def test_array_from_masked():
         pa.array(ma, mask=np.array([True, False, False, False]))
 
 
+@pytest.mark.numpy
 def test_array_from_shrunken_masked():
     ma = np.ma.array([0], dtype='int64')
     result = pa.array(ma)
@@ -2637,6 +2691,7 @@ def test_array_from_shrunken_masked():
     assert expected.equals(result)
 
 
+@pytest.mark.numpy
 def test_array_from_invalid_dim_raises():
     msg = "only handle 1-dimensional arrays"
     arr2d = np.array([[1, 2, 3], [4, 5, 6]])
@@ -2648,6 +2703,7 @@ def test_array_from_invalid_dim_raises():
         pa.array(arr0d)
 
 
+@pytest.mark.numpy
 def test_array_from_strided_bool():
     # ARROW-6325
     arr = np.ones((3, 2), dtype=bool)
@@ -2659,6 +2715,7 @@ def test_array_from_strided_bool():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_strided():
     pydata = [
         ([b"ab", b"cd", b"ef"], (pa.binary(), pa.binary(2))),
@@ -2683,6 +2740,7 @@ def test_boolean_true_count_false_count():
     assert arr.false_count == 1000
 
 
+@pytest.mark.numpy
 def test_buffers_primitive():
     a = pa.array([1, 2, None, 4], type=pa.int16())
     buffers = a.buffers()
@@ -2755,6 +2813,7 @@ def test_buffers_nested():
     assert struct.unpack('4xh', values) == (43,)
 
 
+@pytest.mark.numpy
 def test_total_buffer_size():
     a = pa.array(np.array([4, 5, 6], dtype='int64'))
     assert a.nbytes == 8 * 3
@@ -3153,6 +3212,7 @@ def test_nested_dictionary_array():
     assert dict_arr2.to_pylist() == ['a', 'b', 'a', 'b', 'a']
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_str_utf8():
     # ARROW-3890 -- in Python 3, NPY_UNICODE arrays are produced, but in Python
     # 2 they are NPY_STRING (binary), so we must do UTF-8 validation
@@ -3179,6 +3239,7 @@ def test_array_from_numpy_str_utf8():
         pa.array(vec, pa.string(), mask=np.array([False]))
 
 
+@pytest.mark.numpy
 @pytest.mark.slow
 @pytest.mark.large_memory
 def test_numpy_binary_overflow_to_chunked():
@@ -3237,6 +3298,7 @@ def test_list_child_overflow_to_chunked():
     assert len(arr.chunk(1)) == 1
 
 
+@pytest.mark.numpy
 def test_infer_type_masked():
     # ARROW-5208
     ty = pa.infer_type(['foo', 'bar', None, 2],
@@ -3252,6 +3314,7 @@ def test_infer_type_masked():
     assert pa.infer_type([], mask=[]) == pa.null()
 
 
+@pytest.mark.numpy
 def test_array_masked():
     # ARROW-5208
     arr = pa.array([4, None, 4, 3.],
@@ -3264,6 +3327,7 @@ def test_array_masked():
     assert arr.type == pa.int64()
 
 
+@pytest.mark.numpy
 def test_array_supported_masks():
     # ARROW-13883
     arr = pa.array([4, None, 4, 3.],
@@ -3322,6 +3386,7 @@ def test_array_supported_pandas_masks():
     assert arr.to_pylist() == [None, 1]
 
 
+@pytest.mark.numpy
 def test_binary_array_masked():
     # ARROW-12431
     masked_basic = pa.array([b'\x05'], type=pa.binary(1),
@@ -3354,6 +3419,7 @@ def test_binary_array_masked():
     assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_binary_array_strided():
     # Masked
     nparray = np.array([b"ab", b"cd", b"ef"])
@@ -3367,6 +3433,7 @@ def test_binary_array_strided():
     assert [b"ab", b"ef"] == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_array_invalid_mask_raises():
     # ARROW-10742
     cases = [
@@ -3400,6 +3467,7 @@ def test_array_from_large_pyints():
         pa.array([int(2 ** 63)])
 
 
+@pytest.mark.numpy
 def test_numpy_array_protocol():
     # test the __array__ method on pyarrow.Array
     arr = pa.array([1, 2, 3])
@@ -3446,6 +3514,7 @@ def test_numpy_array_protocol():
     assert result.dtype == "float64"
 
 
+@pytest.mark.numpy
 def test_array_protocol():
 
     class MyArray:
@@ -3769,6 +3838,7 @@ def test_run_end_encoded_from_buffers():
                                            1, offset, children)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_from_array_with_type():
     run_ends = [1, 3, 6]
     values = [1, 2, 3]
@@ -3808,6 +3878,7 @@ def test_run_end_encoded_from_array_with_type():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_to_numpy():
     arr = [1, 2, 2, 3, 3, 3]
     ree_array = pa.array(arr, pa.run_end_encoded(pa.int32(), pa.int64()))
@@ -4023,6 +4094,7 @@ def test_list_view_slice(list_view_type):
     assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('numpy_native_dtype', ['u2', 'i4', 'f8'])
 def test_swapped_byte_order_fails(numpy_native_dtype):
     # ARROW-39129
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index abc8a0013df37..9187a19b5fc24 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -15,10 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
 import weakref
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.lib import StringBuilder, StringViewBuilder
 
@@ -35,7 +34,7 @@ def test_string_builder_append():
     sbuilder = StringBuilder()
     sbuilder.append(b"a byte string")
     sbuilder.append("a string")
-    sbuilder.append(np.nan)
+    sbuilder.append(math.nan)
     sbuilder.append(None)
     assert len(sbuilder) == 4
     assert sbuilder.null_count == 2
@@ -50,7 +49,7 @@ def test_string_builder_append():
 
 def test_string_builder_append_values():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     assert sbuilder.null_count == 3
     arr = sbuilder.finish()
     assert arr.null_count == 3
@@ -60,7 +59,7 @@ def test_string_builder_append_values():
 
 def test_string_builder_append_after_finish():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     arr = sbuilder.finish()
     sbuilder.append("No effect")
     expected = [None, None, "text", None, "other text"]
@@ -72,7 +71,7 @@ def test_string_view_builder():
     builder.append(b"a byte string")
     builder.append("a string")
     builder.append("a longer not-inlined string")
-    builder.append(np.nan)
+    builder.append(math.nan)
     builder.append_values([None, "text"])
     assert len(builder) == 6
     assert builder.null_count == 2
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 64fe7f1deb510..d4307cd24f8fc 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -28,7 +28,10 @@
 import sys
 import textwrap
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 try:
     import pandas as pd
@@ -44,27 +47,6 @@
 except ImportError:
     pas = None
 
-all_array_types = [
-    ('bool', [True, False, False, True, True]),
-    ('uint8', np.arange(5)),
-    ('int8', np.arange(5)),
-    ('uint16', np.arange(5)),
-    ('int16', np.arange(5)),
-    ('uint32', np.arange(5)),
-    ('int32', np.arange(5)),
-    ('uint64', np.arange(5, 10)),
-    ('int64', np.arange(5, 10)),
-    ('float', np.arange(0, 0.5, 0.1)),
-    ('double', np.arange(0, 0.5, 0.1)),
-    ('string', ['a', 'b', None, 'ddd', 'ee']),
-    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
-    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
-    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
-    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
-    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
-        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
-]
-
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
     if hasattr(func, '__arrow_compute_function__')]
@@ -87,6 +69,28 @@
 ]
 
 
+all_array_types = [
+    ('bool', [True, False, False, True, True]),
+    ('uint8', range(5)),
+    ('int8', range(5)),
+    ('uint16', range(5)),
+    ('int16', range(5)),
+    ('uint32', range(5)),
+    ('int32', range(5)),
+    ('uint64', range(5, 10)),
+    ('int64', range(5, 10)),
+    ('float', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('double', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('string', ['a', 'b', None, 'ddd', 'ee']),
+    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
+    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
+    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
+    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
+    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
+        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
+]
+
+
 def test_exported_functions():
     # Check that all exported concrete functions can be called with
     # the right number of arguments.
@@ -263,6 +267,7 @@ def test_get_function_hash_aggregate():
                         pc.HashAggregateKernel, 1)
 
 
+@pytest.mark.numpy
 def test_call_function_with_memory_pool():
     arr = pa.array(["foo", "bar", "baz"])
     indices = np.array([2, 2, 1])
@@ -1172,7 +1177,7 @@ def test_take_on_chunked_array():
         ]
     ])
 
-    indices = np.array([0, 5, 1, 6, 9, 2])
+    indices = pa.array([0, 5, 1, 6, 9, 2])
     result = arr.take(indices)
     expected = pa.chunked_array([["a", "f", "b", "g", "j", "c"]])
     assert result.equals(expected)
@@ -1304,12 +1309,6 @@ def test_filter(ty, values):
     result.validate()
     assert result.equals(pa.array([values[0], values[3], None], type=ty))
 
-    # same test with different array type
-    mask = np.array([True, False, False, True, None])
-    result = arr.filter(mask, null_selection_behavior='drop')
-    result.validate()
-    assert result.equals(pa.array([values[0], values[3]], type=ty))
-
     # non-boolean dtype
     mask = pa.array([0, 1, 0, 1, 0])
     with pytest.raises(NotImplementedError):
@@ -1321,6 +1320,17 @@ def test_filter(ty, values):
         arr.filter(mask)
 
 
+@pytest.mark.numpy
+@pytest.mark.parametrize(('ty', 'values'), all_array_types)
+def test_filter_numpy_array_mask(ty, values):
+    arr = pa.array(values, type=ty)
+    # same test as test_filter with different array type
+    mask = np.array([True, False, False, True, None])
+    result = arr.filter(mask, null_selection_behavior='drop')
+    result.validate()
+    assert result.equals(pa.array([values[0], values[3]], type=ty))
+
+
 def test_filter_chunked_array():
     arr = pa.chunked_array([["a", None], ["c", "d", "e"]])
     expected_drop = pa.chunked_array([["a"], ["e"]])
@@ -1586,9 +1596,11 @@ def test_round_to_integer(ty):
     for round_mode, expected in rmode_and_expected.items():
         options = RoundOptions(round_mode=round_mode)
         result = round(values, options=options)
-        np.testing.assert_array_equal(result, pa.array(expected))
+        expected_array = pa.array(expected, type=pa.float64())
+        assert expected_array.equals(result)
 
 
+@pytest.mark.numpy
 def test_round():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     ndigits_and_expected = {
@@ -1607,6 +1619,7 @@ def test_round():
         assert pc.round(values, ndigits, "half_towards_infinity") == result
 
 
+@pytest.mark.numpy
 def test_round_to_multiple():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     multiple_and_expected = {
@@ -1670,7 +1683,7 @@ def test_is_null():
     expected = pa.chunked_array([[True, True], [True, False]])
     assert result.equals(expected)
 
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_null()
     expected = pa.array([False, False, False, True, False])
     assert result.equals(expected)
@@ -1681,7 +1694,7 @@ def test_is_null():
 
 
 def test_is_nan():
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_nan()
     expected = pa.array([False, False, False, None, True])
     assert result.equals(expected)
@@ -1986,6 +1999,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx,
 
 
 # Cannot test float32 as case generators above assume float64
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_ty', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2003,6 +2017,7 @@ def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator):
                 ctx, decimal_ty.max_precision)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float32(), pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_traits', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2908,6 +2923,7 @@ def test_min_max_element_wise():
     assert result == pa.array([1, 2, None])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_sum(start, skip_nulls):
@@ -2962,6 +2978,7 @@ def test_cumulative_sum(start, skip_nulls):
             pc.cumulative_sum([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_prod(start, skip_nulls):
@@ -3016,6 +3033,7 @@ def test_cumulative_prod(start, skip_nulls):
             pc.cumulative_prod([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_max(start, skip_nulls):
@@ -3073,6 +3091,7 @@ def test_cumulative_max(start, skip_nulls):
             pc.cumulative_max([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_min(start, skip_nulls):
@@ -3407,6 +3426,7 @@ def create_sample_expressions():
 # Tests the Arrow-specific serialization mechanism
 
 
+@pytest.mark.numpy
 def test_expression_serialization_arrow(pickle_module):
     for expr in create_sample_expressions()["all"]:
         assert isinstance(expr, pc.Expression)
@@ -3414,6 +3434,7 @@ def test_expression_serialization_arrow(pickle_module):
         assert expr.equals(restored)
 
 
+@pytest.mark.numpy
 @pytest.mark.substrait
 def test_expression_serialization_substrait():
 
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 6140163a8ee8c..c3589877e6423 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -23,8 +23,11 @@
 import re
 
 import hypothesis as h
-import numpy as np
 import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
@@ -32,17 +35,17 @@
 
 
 int_type_pairs = [
-    (np.int8, pa.int8()),
-    (np.int16, pa.int16()),
-    (np.int32, pa.int32()),
-    (np.int64, pa.int64()),
-    (np.uint8, pa.uint8()),
-    (np.uint16, pa.uint16()),
-    (np.uint32, pa.uint32()),
-    (np.uint64, pa.uint64())]
+    ("int8", pa.int8()),
+    ("int16", pa.int16()),
+    ("int32", pa.int32()),
+    ("int64", pa.int64()),
+    ("uint8", pa.uint8()),
+    ("uint16", pa.uint16()),
+    ("uint32", pa.uint32()),
+    ("uint64", pa.uint64())]
 
 
-np_int_types, pa_int_types = zip(*int_type_pairs)
+np_str_int_types, pa_int_types = zip(*int_type_pairs)
 
 
 class StrangeIterable:
@@ -174,7 +177,9 @@ def _as_set(xs):
     return set(xs)
 
 
-SEQUENCE_TYPES = [_as_list, _as_tuple, _as_numpy_array]
+SEQUENCE_TYPES = [_as_list, _as_tuple]
+if np is not None:
+    SEQUENCE_TYPES.append(_as_numpy_array)
 ITERABLE_TYPES = [_as_set, _as_dict_values] + SEQUENCE_TYPES
 COLLECTIONS_TYPES = [_as_deque] + ITERABLE_TYPES
 
@@ -217,6 +222,7 @@ def test_sequence_boolean(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_numpy_boolean(seq):
     expected = [np.bool_(True), None, np.bool_(False), None]
@@ -225,6 +231,7 @@ def test_sequence_numpy_boolean(seq):
     assert arr.to_pylist() == [True, None, False, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_mixed_numpy_python_bools(seq):
     values = np.array([True, False])
@@ -278,11 +285,14 @@ def test_list_with_non_list(seq):
 
 
 @parametrize_with_sequence_types
+@pytest.mark.parametrize(
+    "inner_seq", SEQUENCE_TYPES
+)
 @pytest.mark.parametrize("factory", [
     pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
-def test_nested_arrays(seq, factory):
-    arr = pa.array(seq([np.array([], dtype=np.int64),
-                        np.array([1, 2], dtype=np.int64), None]),
+def test_nested_arrays(seq, inner_seq, factory):
+    arr = pa.array(seq([inner_seq([]),
+                        inner_seq([1, 2]), None]),
                    type=factory(pa.int64()))
     assert len(arr) == 3
     assert arr.null_count == 1
@@ -290,6 +300,7 @@ def test_nested_arrays(seq, factory):
     assert arr.to_pylist() == [[], [1, 2], None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_nested_fixed_size_list(seq):
     # sequence of lists
@@ -334,10 +345,12 @@ def test_sequence_all_none(seq):
     assert arr.to_pylist() == [None, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [1, None, 3, None,
                 np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
     arr = pa.array(seq(expected), type=pa_type)
@@ -347,12 +360,12 @@ def test_sequence_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([np.nan]), type=pa_type, from_pandas=False)
 
@@ -364,12 +377,12 @@ def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_nested_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_nested_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([[np.nan]]), type=pa.list_(pa_type), from_pandas=False)
 
@@ -391,10 +404,12 @@ def test_sequence_integer_inferred(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None,
                 np_scalar(np.iinfo(np_scalar).min),
                 np_scalar(np.iinfo(np_scalar).max)]
@@ -405,10 +420,12 @@ def test_sequence_numpy_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None]
     expected += [np_scalar(np.iinfo(np_scalar).min),
                  np_scalar(np.iinfo(np_scalar).max)]
@@ -434,6 +451,7 @@ def test_broken_integers(seq):
         pa.array(seq(data), type=pa.int64())
 
 
+@pytest.mark.numpy
 def test_numpy_scalars_mixed_type():
     # ARROW-4324
     data = [np.int32(10), np.float32(0.5)]
@@ -448,6 +466,7 @@ def test_numpy_scalars_mixed_type():
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason="Type inference for uint64 not implemented",
                    raises=OverflowError)
 def test_uint64_max_convert():
@@ -491,7 +510,7 @@ def test_integer_from_string_error(seq, typ):
 
 def test_convert_with_mask():
     data = [1, 2, 3, 4, 5]
-    mask = np.array([False, True, False, False, True])
+    mask = [False, True, False, False, True]
 
     result = pa.array(data, mask=mask)
     expected = pa.array([1, None, 3, 4, None])
@@ -559,6 +578,7 @@ def test_double_integer_coerce_representable_range():
         pa.array(invalid_values2)
 
 
+@pytest.mark.numpy
 def test_float32_integer_coerce_representable_range():
     f32 = np.float32
     valid_values = [f32(1.5), 1 << 24, -(1 << 24)]
@@ -587,14 +607,16 @@ def test_mixed_sequence_errors():
         pa.array([1.5, 'foo'])
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar,pa_type", [
-    (np.float16, pa.float16()),
-    (np.float32, pa.float32()),
-    (np.float64, pa.float64())
+@pytest.mark.parametrize("np_str_scalar,pa_type", [
+    ("float16", pa.float16()),
+    ("float32", pa.float32()),
+    ("float64", pa.float64())
 ])
 @pytest.mark.parametrize("from_pandas", [True, False])
-def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
+def test_sequence_numpy_double(seq, np_str_scalar, pa_type, from_pandas):
+    np_scalar = getattr(np, np_str_scalar)
     data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, np.nan]
     arr = pa.array(seq(data), from_pandas=from_pandas)
     assert len(arr) == 6
@@ -616,27 +638,29 @@ def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
         assert np.isnan(arr.to_pylist()[5])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("from_pandas", [True, False])
-@pytest.mark.parametrize("inner_seq", [np.array, list])
-def test_ndarray_nested_numpy_double(from_pandas, inner_seq):
+def test_ndarray_nested_numpy_double(from_pandas):
     # ARROW-2806
-    data = np.array([
-        inner_seq([1., 2.]),
-        inner_seq([1., 2., 3.]),
-        inner_seq([np.nan]),
-        None
-    ], dtype=object)
-    arr = pa.array(data, from_pandas=from_pandas)
-    assert len(arr) == 4
-    assert arr.null_count == 1
-    assert arr.type == pa.list_(pa.float64())
-    if from_pandas:
-        assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
-    else:
-        np.testing.assert_equal(arr.to_pylist(),
-                                [[1., 2.], [1., 2., 3.], [np.nan], None])
+    for inner_seq in (np.array, list):
+        data = np.array([
+            inner_seq([1., 2.]),
+            inner_seq([1., 2., 3.]),
+            inner_seq([np.nan]),
+            None
+        ], dtype=object)
+        arr = pa.array(data, from_pandas=from_pandas)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pa.list_(pa.float64())
+        if from_pandas:
+            assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
+        else:
+            np.testing.assert_equal(arr.to_pylist(),
+                                    [[1., 2.], [1., 2., 3.], [np.nan], None])
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_in_object_array():
     # ARROW-4350
     arr = np.empty(2, dtype=object)
@@ -664,6 +688,7 @@ def test_nested_ndarray_in_object_array():
     assert result.to_pylist() == [[[1], [2]], [[1], [2]]]
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason=("Type inference for multidimensional ndarray "
                            "not yet implemented"),
                    raises=AssertionError)
@@ -682,6 +707,7 @@ def test_multidimensional_ndarray_as_nested_list():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([True, False], pa.bool_()),
     ([None, None], pa.null()),
@@ -711,6 +737,7 @@ def test_list_array_from_object_ndarray(data, value_type):
     assert arr.to_pylist() == [data]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([[1, 2], [3]], pa.list_(pa.int64())),
     ([[1, 2], [3, 4]], pa.list_(pa.int64(), 2)),
@@ -730,13 +757,14 @@ def test_array_ignore_nan_from_pandas():
     # See ARROW-4324, this reverts logic that was introduced in
     # ARROW-2240
     with pytest.raises(ValueError):
-        pa.array([np.nan, 'str'])
+        pa.array([float("nan"), 'str'])
 
-    arr = pa.array([np.nan, 'str'], from_pandas=True)
+    arr = pa.array([float("nan"), 'str'], from_pandas=True)
     expected = pa.array([None, 'str'])
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_different_dtypes():
     data = [
         np.array([1, 2, 3], dtype='int64'),
@@ -1238,6 +1266,7 @@ def test_sequence_timestamp_out_of_bounds_nanosecond():
     assert arr.to_pylist()[0] == datetime.datetime(2262, 4, 12)
 
 
+@pytest.mark.numpy
 def test_sequence_numpy_timestamp():
     data = [
         np.datetime64(datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)),
@@ -1407,14 +1436,25 @@ class CustomClass():
             pa.array([1, CustomClass()], type=ty)
 
 
-@pytest.mark.parametrize('np_scalar', [True, False])
-def test_sequence_duration(np_scalar):
+def test_sequence_duration():
     td1 = datetime.timedelta(2, 3601, 1)
     td2 = datetime.timedelta(1, 100, 1000)
-    if np_scalar:
-        data = [np.timedelta64(td1), None, np.timedelta64(td2)]
-    else:
-        data = [td1, None, td2]
+    data = [td1, None, td2]
+
+    arr = pa.array(data)
+    assert len(arr) == 3
+    assert arr.type == pa.duration('us')
+    assert arr.null_count == 1
+    assert arr[0].as_py() == td1
+    assert arr[1].as_py() is None
+    assert arr[2].as_py() == td2
+
+
+@pytest.mark.numpy
+def test_sequence_duration_np_scalar():
+    td1 = datetime.timedelta(2, 3601, 1)
+    td2 = datetime.timedelta(1, 100, 1000)
+    data = [np.timedelta64(td1), None, np.timedelta64(td2)]
 
     arr = pa.array(data)
     assert len(arr) == 3
@@ -1480,6 +1520,7 @@ def test_sequence_duration_nested_lists_with_explicit_type(factory):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_sequence_duration_nested_lists_numpy():
     td1 = datetime.timedelta(1, 1, 1000)
     td2 = datetime.timedelta(1, 100)
@@ -1769,6 +1810,7 @@ def test_struct_from_dicts_bytes_keys():
     ]
 
 
+@pytest.mark.numpy
 def test_struct_from_tuples():
     ty = pa.struct([pa.field('a', pa.int32()),
                     pa.field('b', pa.string()),
@@ -1915,6 +1957,7 @@ def test_struct_from_mixed_sequence():
         pa.array(data, type=ty)
 
 
+@pytest.mark.numpy
 def test_struct_from_dicts_inference():
     expected_type = pa.struct([pa.field('a', pa.int64()),
                                pa.field('b', pa.string()),
@@ -1992,7 +2035,7 @@ def test_structarray_from_arrays_coerce():
 
 
 def test_decimal_array_with_none_and_nan():
-    values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')]
+    values = [decimal.Decimal('1.234'), None, float("nan"), decimal.Decimal('nan')]
 
     with pytest.raises(TypeError):
         # ARROW-6227: Without from_pandas=True, NaN is considered a float
@@ -2215,6 +2258,7 @@ def test_roundtrip_nanosecond_resolution_pandas_temporal_objects():
     ]
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_array_to_pylist_roundtrip(arr):
     seq = arr.to_pylist()
@@ -2498,6 +2542,7 @@ def test_array_accepts_pyarrow_scalar(seq, data, scalar_data, value_type):
     assert expect.equals(result)
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
 def test_array_accepts_pyarrow_scalar_errors(seq):
     sequence = seq([pa.scalar(1), pa.scalar("a"), pa.scalar(3.0)])
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 83800b77f894b..7508d8f0b9816 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -18,6 +18,8 @@
 import os.path
 from os.path import join as pjoin
 
+import pytest
+
 from pyarrow._pyarrow_cpp_tests import get_cpp_tests
 
 
@@ -26,10 +28,16 @@ def inject_cpp_tests(ns):
     Inject C++ tests as Python functions into namespace `ns` (a dict).
     """
     for case in get_cpp_tests():
+
         def wrapper(case=case):
             case()
         wrapper.__name__ = wrapper.__qualname__ = case.name
         wrapper.__module__ = ns['__name__']
+        # Add numpy or pandas marks if the test requires it
+        if 'numpy' in case.name:
+            wrapper = pytest.mark.numpy(wrapper)
+        elif 'pandas' in case.name:
+            wrapper = pytest.mark.pandas(wrapper)
         ns[case.name] = wrapper
 
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 112129d9602ed..dcf96f68c4da7 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -24,6 +24,7 @@
 import io
 import itertools
 import os
+import random
 import select
 import shutil
 import signal
@@ -36,8 +37,6 @@
 
 import pytest
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.csv import (
     open_csv, read_csv, ReadOptions, ParseOptions, ConvertOptions, ISO8601,
@@ -54,18 +53,32 @@ def generate_col_names():
             yield first + second
 
 
+def split_rows(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into rows
+    for i in range(0, num_rows * num_cols, num_cols):
+        yield arr[i:i + num_cols]
+
+
+def split_columns(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into columns
+    for i in range(0, num_cols):
+        yield arr[i::num_cols]
+
+
 def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True):
-    arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows))
+    rnd = random.Random(42)
+    arr = [rnd.randint(0, 1000) for _ in range(num_cols * num_rows)]
     csv = io.StringIO()
     col_names = list(itertools.islice(generate_col_names(), num_cols))
     if write_names:
         csv.write(",".join(col_names))
         csv.write(linesep)
-    for row in arr.T:
+    for row in split_rows(arr, num_cols, num_rows):
         csv.write(",".join(map(str, row)))
         csv.write(linesep)
     csv = csv.getvalue().encode()
-    columns = [pa.array(a, type=pa.int64()) for a in arr]
+    columns = [pa.array(row, type=pa.int64())
+               for row in split_columns(arr, num_cols, num_rows)]
     expected = pa.Table.from_arrays(columns, col_names)
     return csv, expected
 
@@ -127,6 +140,25 @@ def __ne__(self, other):
                 other.result != self.result)
 
 
+def test_split_rows_and_columns_utility():
+    num_cols = 5
+    num_rows = 2
+    arr = [x for x in range(1, 11)]
+    rows = list(split_rows(arr, num_cols, num_rows))
+    assert rows == [
+        [1, 2, 3, 4, 5],
+        [6, 7, 8, 9, 10]
+    ]
+    columns = list(split_columns(arr, num_cols, num_rows))
+    assert columns == [
+        [1, 6],
+        [2, 7],
+        [3, 8],
+        [4, 9],
+        [5, 10]
+    ]
+
+
 def test_read_options(pickle_module):
     cls = ReadOptions
     opts = cls()
@@ -520,6 +552,7 @@ def test_skip_rows_after_names(self):
             assert (values[opts.skip_rows + opts.skip_rows_after_names:] ==
                     table_dict[name])
 
+    @pytest.mark.numpy
     def test_row_number_offset_in_errors(self):
         # Row numbers are only correctly counted in serial reads
         def format_msg(msg_format, row, *args):
@@ -1802,6 +1835,7 @@ def test_header_skip_rows(self):
         with pytest.raises(StopIteration):
             assert reader.read_next_batch()
 
+    @pytest.mark.numpy
     def test_skip_rows_after_names(self):
         super().test_skip_rows_after_names()
 
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index d55be651b1571..a71fa036503d7 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -26,7 +26,10 @@
 import pytest
 
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 
 cuda = pytest.importorskip("pyarrow.cuda")
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index ff1722d278d5e..876f3c7f761cf 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -17,7 +17,10 @@
 
 import pytest
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 dtypes = ['uint8', 'int16', 'float32']
 cuda = pytest.importorskip("pyarrow.cuda")
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index 0eeae5d65f7d5..937d927f831b0 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -80,6 +80,9 @@ def check_cython_example_module(mod):
         mod.cast_scalar(scal, pa.list_(pa.int64()))
 
 
+# NumPy is still a required build dependency. It is present in our
+# headers and is required to build for the cython tests.
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cython_api(tmpdir):
     """
@@ -162,6 +165,7 @@ def test_cython_api(tmpdir):
                               env=subprocess_env)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_visit_strings(tmpdir):
     with tmpdir.as_cwd():
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 3b0284bcb74a6..276cd2e78db37 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -20,6 +20,7 @@
 import os
 import pathlib
 import posixpath
+import random
 import sys
 import tempfile
 import textwrap
@@ -28,7 +29,10 @@
 from shutil import copytree
 from urllib.parse import quote
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -684,8 +688,8 @@ def test_partitioning():
 
     # test partitioning roundtrip
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     partitioning_schema = pa.schema([("part", pa.string())])
@@ -2494,7 +2498,7 @@ def _create_partitioned_dataset(basedir):
         pq.write_table(table.slice(3*i, 3), part / "test.parquet")
 
     full_table = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int32()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int32()))
 
     return full_table, path
 
@@ -2532,7 +2536,7 @@ def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_modu
 
     result = dataset.to_table()
     expected = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int8()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int8()))
     assert result.equals(expected)
 
 
@@ -3567,7 +3571,7 @@ def _create_parquet_dataset_simple(root_path):
     metadata_collector = []
 
     for i in range(4):
-        table = pa.table({'f1': [i] * 10, 'f2': np.random.randn(10)})
+        table = pa.table({'f1': [i] * 10, 'f2': [random.random() for _ in range(10)]})
         pq.write_to_dataset(
             table, str(root_path), metadata_collector=metadata_collector
         )
@@ -4255,7 +4259,7 @@ def compare_tables_ignoring_order(t1, t2):
 
 
 def _generate_random_int_array(size=4, min=1, max=10):
-    return np.random.randint(min, max, size)
+    return [random.randint(min, max) for _ in range(size)]
 
 
 def _generate_data_and_columns(num_of_columns, num_of_records):
@@ -4513,8 +4517,8 @@ def file_visitor(written_file):
 
 def test_write_table(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'single'
@@ -4560,8 +4564,8 @@ def file_visitor(written_file):
 
 def test_write_table_multiple_fragments(tempdir):
     table = pa.table([
-        pa.array(range(10)), pa.array(np.random.randn(10)),
-        pa.array(np.repeat(['a', 'b'], 5))
+        pa.array(range(10)), pa.array(random.random() for _ in range(10)),
+        pa.array(['a'] * 5 + ['b'] * 5)
     ], names=["f1", "f2", "part"])
     table = pa.concat_tables([table]*2)
 
@@ -4596,8 +4600,8 @@ def test_write_table_multiple_fragments(tempdir):
 
 def test_write_iterable(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'inmemory_iterable'
@@ -4618,8 +4622,8 @@ def test_write_iterable(tempdir):
 
 def test_write_scanner(tempdir, dataset_reader):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
     dataset = ds.dataset(table)
 
@@ -4647,7 +4651,7 @@ def test_write_table_partitioned_dict(tempdir):
     # specifying the dictionary values explicitly
     table = pa.table([
         pa.array(range(20)),
-        pa.array(np.repeat(['a', 'b'], 10)).dictionary_encode(),
+        pa.array(['a'] * 10 + ['b'] * 10).dictionary_encode(),
     ], names=['col', 'part'])
 
     partitioning = ds.partitioning(table.select(["part"]).schema)
@@ -4666,6 +4670,7 @@ def test_write_table_partitioned_dict(tempdir):
     assert result.equals(table)
 
 
+@pytest.mark.numpy
 @pytest.mark.parquet
 def test_write_dataset_parquet(tempdir):
     table = pa.table([
@@ -4712,8 +4717,8 @@ def test_write_dataset_parquet(tempdir):
 
 def test_write_dataset_csv(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "chr1"])
 
     base_dir = tempdir / 'csv_dataset'
@@ -4739,8 +4744,8 @@ def test_write_dataset_csv(tempdir):
 @pytest.mark.parquet
 def test_write_dataset_parquet_file_visitor(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     visitor_called = False
@@ -4763,7 +4768,7 @@ def test_partition_dataset_parquet_file_visitor(tempdir):
     f1_vals = [item for chunk in range(4) for item in [chunk] * 10]
     f2_vals = [item*10 for chunk in range(4) for item in [chunk] * 10]
     table = pa.table({'f1': f1_vals, 'f2': f2_vals,
-                      'part': np.repeat(['a', 'b'], 20)})
+                      'part': ['a'] * 20 + ['b'] * 20})
 
     root_path = tempdir / 'partitioned'
     partitioning = ds.partitioning(
@@ -4841,8 +4846,8 @@ def test_write_dataset_s3(s3_example_simple):
     )
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
@@ -4918,8 +4923,8 @@ def test_write_dataset_s3_put_only(s3_server):
     _configure_s3_limited_user(s3_server, _minio_put_only_policy)
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a']*10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index 0d8b4a152ab9f..eb79121b1cdbe 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -17,7 +17,7 @@
 
 import base64
 from datetime import timedelta
-import numpy as np
+import random
 import pyarrow.fs as fs
 import pyarrow as pa
 
@@ -187,7 +187,10 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
 
     row_count = 2**15 + 1
     table = pa.Table.from_arrays(
-        [pa.array(np.random.rand(row_count), type=pa.float32())], names=["foo"]
+        [pa.array(
+            [random.random() for _ in range(row_count)],
+            type=pa.float32()
+        )], names=["foo"]
     )
 
     kms_config = pe.KmsConnectionConfig()
diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py
index 7cf3f4acdbd40..a18accb1e21df 100644
--- a/python/pyarrow/tests/test_dlpack.py
+++ b/python/pyarrow/tests/test_dlpack.py
@@ -19,12 +19,20 @@
 from functools import wraps
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
 
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not numpy'
+pytestmark = pytest.mark.numpy
+
+
 def PyCapsule_IsValid(capsule, name):
     return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1
 
@@ -52,45 +60,45 @@ def wrapper(*args, **kwargs):
 
 @check_bytes_allocated
 @pytest.mark.parametrize(
-    ('value_type', 'np_type'),
+    ('value_type', 'np_type_str'),
     [
-        (pa.uint8(), np.uint8),
-        (pa.uint16(), np.uint16),
-        (pa.uint32(), np.uint32),
-        (pa.uint64(), np.uint64),
-        (pa.int8(), np.int8),
-        (pa.int16(), np.int16),
-        (pa.int32(), np.int32),
-        (pa.int64(), np.int64),
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64),
+        (pa.uint8(), "uint8"),
+        (pa.uint16(), "uint16"),
+        (pa.uint32(), "uint32"),
+        (pa.uint64(), "uint64"),
+        (pa.int8(), "int8"),
+        (pa.int16(), "int16"),
+        (pa.int32(), "int32"),
+        (pa.int64(), "int64"),
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64"),
     ]
 )
-def test_dlpack(value_type, np_type):
+def test_dlpack(value_type, np_type_str):
     if Version(np.__version__) < Version("1.24.0"):
         pytest.skip("No dlpack support in numpy versions older than 1.22.0, "
                     "strict keyword in assert_array_equal added in numpy version "
                     "1.24.0")
 
-    expected = np.array([1, 2, 3], dtype=np_type)
+    expected = np.array([1, 2, 3], dtype=np.dtype(np_type_str))
     arr = pa.array(expected, type=value_type)
     check_dlpack_export(arr, expected)
 
     arr_sliced = arr.slice(1, 1)
-    expected = np.array([2], dtype=np_type)
+    expected = np.array([2], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(0, 1)
-    expected = np.array([1], dtype=np_type)
+    expected = np.array([1], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(1)
-    expected = np.array([2, 3], dtype=np_type)
+    expected = np.array([2, 3], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_zero = pa.array([], type=value_type)
-    expected = np.array([], dtype=np_type)
+    expected = np.array([], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_zero, expected)
 
 
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index b04ee85ec99ad..b74eca75bdca9 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -23,12 +23,15 @@
 from uuid import uuid4, UUID
 import sys
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
-import pytest
-
 
 @contextlib.contextmanager
 def registered_extension_type(ext_type):
@@ -95,18 +98,21 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidScalarType(pa.ExtensionScalar):
+class ExampleUuidScalarType(pa.ExtensionScalar):
     def as_py(self):
         return None if self.value is None else UUID(bytes=self.value.as_py())
 
 
-class UuidType(pa.ExtensionType):
+class ExampleUuidType(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType')
+
+    def __reduce__(self):
+        return ExampleUuidType, ()
 
     def __arrow_ext_scalar_class__(self):
-        return UuidScalarType
+        return ExampleUuidScalarType
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -116,10 +122,10 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidType2(pa.ExtensionType):
+class ExampleUuidType2(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType2')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType2')
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -250,8 +256,8 @@ def ipc_read_batch(buf):
 
 
 def test_ext_type_basics():
-    ty = UuidType()
-    assert ty.extension_name == "pyarrow.tests.UuidType"
+    ty = ExampleUuidType()
+    assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
 
 
 def test_ext_type_str():
@@ -267,16 +273,16 @@ def test_ext_type_repr():
 
 
 def test_ext_type_lifetime():
-    ty = UuidType()
+    ty = ExampleUuidType()
     wr = weakref.ref(ty)
     del ty
     assert wr() is None
 
 
 def test_ext_type_storage_type():
-    ty = UuidType()
+    ty = ExampleUuidType()
     assert ty.storage_type == pa.binary(16)
-    assert ty.__class__ is UuidType
+    assert ty.__class__ is ExampleUuidType
     ty = ParamExtType(5)
     assert ty.storage_type == pa.binary(5)
     assert ty.__class__ is ParamExtType
@@ -284,7 +290,7 @@ def test_ext_type_storage_type():
 
 def test_ext_type_byte_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.byte_width == 16
     ty = ParamExtType(5)
     assert ty.byte_width == 5
@@ -297,7 +303,7 @@ def test_ext_type_byte_width():
 
 def test_ext_type_bit_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.bit_width == 128
     ty = ParamExtType(5)
     assert ty.bit_width == 40
@@ -309,7 +315,7 @@ def test_ext_type_bit_width():
 
 
 def test_ext_type_as_py():
-    ty = UuidType()
+    ty = ExampleUuidType()
     expected = uuid4()
     scalar = pa.ExtensionScalar.from_storage(ty, expected.bytes)
     assert scalar.as_py() == expected
@@ -342,12 +348,22 @@ def test_ext_type_as_py():
 
 def test_uuid_type_pickle(pickle_module):
     for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
-        ty = UuidType()
+        ty = ExampleUuidType()
         ser = pickle_module.dumps(ty, protocol=proto)
         del ty
         ty = pickle_module.loads(ser)
         wr = weakref.ref(ty)
-        assert ty.extension_name == "pyarrow.tests.UuidType"
+        assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
+        del ty
+        assert wr() is None
+
+    for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
+        ty = pa.uuid()
+        ser = pickle_module.dumps(ty, protocol=proto)
+        del ty
+        ty = pickle_module.loads(ser)
+        wr = weakref.ref(ty)
+        assert ty.extension_name == "arrow.uuid"
         del ty
         assert wr() is None
 
@@ -358,8 +374,8 @@ def test_ext_type_equality():
     c = ParamExtType(6)
     assert a != b
     assert b == c
-    d = UuidType()
-    e = UuidType()
+    d = ExampleUuidType()
+    e = ExampleUuidType()
     assert a != d
     assert d == e
 
@@ -403,7 +419,7 @@ def test_ext_array_equality():
     storage1 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage2 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage3 = pa.array([], type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
 
     a = pa.ExtensionArray.from_storage(ty1, storage1)
@@ -451,9 +467,9 @@ def test_ext_scalar_from_array():
     data = [b"0123456789abcdef", b"0123456789abcdef",
             b"zyxwvutsrqponmlk", None]
     storage = pa.array(data, type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
-    ty3 = UuidType2()
+    ty3 = ExampleUuidType2()
 
     a = pa.ExtensionArray.from_storage(ty1, storage)
     b = pa.ExtensionArray.from_storage(ty2, storage)
@@ -462,9 +478,9 @@ def test_ext_scalar_from_array():
     scalars_a = list(a)
     assert len(scalars_a) == 4
 
-    assert ty1.__arrow_ext_scalar_class__() == UuidScalarType
-    assert isinstance(a[0], UuidScalarType)
-    assert isinstance(scalars_a[0], UuidScalarType)
+    assert ty1.__arrow_ext_scalar_class__() == ExampleUuidScalarType
+    assert isinstance(a[0], ExampleUuidScalarType)
+    assert isinstance(scalars_a[0], ExampleUuidScalarType)
 
     for s, val in zip(scalars_a, data):
         assert isinstance(s, pa.ExtensionScalar)
@@ -505,7 +521,7 @@ def test_ext_scalar_from_array():
 
 
 def test_ext_scalar_from_storage():
-    ty = UuidType()
+    ty = ExampleUuidType()
 
     s = pa.ExtensionScalar.from_storage(ty, None)
     assert isinstance(s, pa.ExtensionScalar)
@@ -549,6 +565,7 @@ def test_ext_array_pickling(pickle_module):
         assert arr.storage.to_pylist() == [b"foo", b"bar"]
 
 
+@pytest.mark.numpy
 def test_ext_array_conversion_to_numpy():
     storage1 = pa.array([1, 2, 3], type=pa.int64())
     storage2 = pa.array([b"123", b"456", b"789"], type=pa.binary(3))
@@ -606,6 +623,7 @@ def struct_w_ext_data():
     return [sarr1, sarr2]
 
 
+@pytest.mark.numpy
 def test_struct_w_ext_array_to_numpy(struct_w_ext_data):
     # ARROW-15291
     # Check that we don't segfault when trying to build
@@ -706,14 +724,14 @@ def test_cast_between_extension_types():
     tiny_int_arr.cast(pa.int64()).cast(IntegerType())
 
     # Between the same extension types is okay
-    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(UuidType())
-    out = array.cast(UuidType())
-    assert out.type == UuidType()
+    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(ExampleUuidType())
+    out = array.cast(ExampleUuidType())
+    assert out.type == ExampleUuidType()
 
     # Will still fail casting between extensions who share storage type,
     # can only cast between exactly the same extension types.
     with pytest.raises(TypeError, match='Casting from *'):
-        array.cast(UuidType2())
+        array.cast(ExampleUuidType2())
 
 
 def test_cast_to_extension_with_extension_storage():
@@ -744,10 +762,10 @@ def test_cast_nested_extension_types(data, type_factory):
 
 def test_casting_dict_array_to_extension_type():
     storage = pa.array([b"0123456789abcdef"], type=pa.binary(16))
-    arr = pa.ExtensionArray.from_storage(UuidType(), storage)
+    arr = pa.ExtensionArray.from_storage(ExampleUuidType(), storage)
     dict_arr = pa.DictionaryArray.from_arrays(pa.array([0, 0], pa.int32()),
                                               arr)
-    out = dict_arr.cast(UuidType())
+    out = dict_arr.cast(ExampleUuidType())
     assert isinstance(out, pa.ExtensionArray)
     assert out.to_pylist() == [UUID('30313233-3435-3637-3839-616263646566'),
                                UUID('30313233-3435-3637-3839-616263646566')]
@@ -1220,6 +1238,7 @@ def test_parquet_extension_nested_in_extension(tmpdir):
             assert table == orig_table
 
 
+@pytest.mark.numpy
 def test_to_numpy():
     period_type = PeriodType('D')
     storage = pa.array([1, 2, 3, 4], pa.int64())
@@ -1272,7 +1291,11 @@ def test_empty_take():
     (["cat", "dog", "horse"], LabelType)
 ))
 @pytest.mark.parametrize(
-    "into", ["to_numpy", pytest.param("to_pandas", marks=pytest.mark.pandas)])
+    "into", [
+        pytest.param("to_numpy", marks=pytest.mark.numpy),
+        pytest.param("to_pandas", marks=pytest.mark.pandas)
+    ]
+)
 def test_extension_array_to_numpy_pandas(data, ty, into):
     storage = pa.array(data)
     ext_arr = pa.ExtensionArray.from_storage(ty(), storage)
@@ -1288,6 +1311,7 @@ def test_extension_array_to_numpy_pandas(data, ty, into):
         assert np.array_equal(result, expected)
 
 
+@pytest.mark.numpy
 def test_array_constructor():
     ext_type = IntegerType()
     storage = pa.array([1, 2, 3], type=pa.int64())
@@ -1320,6 +1344,7 @@ def test_array_constructor_from_pandas():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cpp_extension_in_python(tmpdir):
     from .test_cython import (
@@ -1347,7 +1372,7 @@ def test_cpp_extension_in_python(tmpdir):
     mod = __import__('extensions')
 
     uuid_type = mod._make_uuid_type()
-    assert uuid_type.extension_name == "uuid"
+    assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
     array = mod._make_uuid_array()
@@ -1356,6 +1381,31 @@ def test_cpp_extension_in_python(tmpdir):
     assert array[0].as_py() == b'abcdefghijklmno0'
     assert array[1].as_py() == b'0onmlkjihgfedcba'
 
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["example-uuid"]))
+
+    batch = ipc_read_batch(buf)
+    reconstructed_array = batch.column(0)
+    assert reconstructed_array.type == uuid_type
+    assert reconstructed_array == array
+
+
+def test_uuid_extension():
+    data = [b"0123456789abcdef", b"0123456789abcdef",
+            b"zyxwvutsrqponmlk", None]
+
+    uuid_type = pa.uuid()
+    assert uuid_type.extension_name == "arrow.uuid"
+    assert uuid_type.storage_type == pa.binary(16)
+    assert uuid_type.__class__ is pa.UuidType
+
+    storage = pa.array(data, pa.binary(16))
+    array = pa.ExtensionArray.from_storage(uuid_type, storage)
+    assert array.type == uuid_type
+
+    assert array.to_pylist() == [x if x is None else UUID(bytes=x) for x in data]
+    assert array[0].as_py() == UUID(bytes=data[0])
+    assert array[3].as_py() is None
+
     buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"]))
 
     batch = ipc_read_batch(buf)
@@ -1363,6 +1413,9 @@ def test_cpp_extension_in_python(tmpdir):
     assert reconstructed_array.type == uuid_type
     assert reconstructed_array == array
 
+    assert uuid_type.__arrow_ext_scalar_class__() == pa.UuidScalar
+    assert isinstance(array[0], pa.UuidScalar)
+
 
 def test_tensor_type():
     tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 3])
@@ -1389,38 +1442,45 @@ def test_tensor_type():
     assert tensor_type.permutation is None
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_class_methods(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_class_methods(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 3])
     storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
                        pa.list_(arrow_type, 6))
     arr = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(arr.to_tensor(), expected)
     np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
 
-    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=np.dtype(np_type_str))
     result = arr[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
     values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
-    flat_arr = np.array(values[0], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array(values[0], dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
     storage = pa.array(values, pa.list_(arrow_type, 12))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
     result = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]], dtype=value_type)
+        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
     result = flat_arr.reshape(1, 2, 3, 2)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result, expected)
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
@@ -1441,25 +1501,27 @@ def test_tensor_class_methods(value_type):
     assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_array_from_numpy(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_numpy(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                   dtype=value_type, order="C")
+                   dtype=np.dtype(np_type_str), order="C")
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     assert isinstance(tensor_array_from_numpy.type, pa.FixedShapeTensorType)
     assert tensor_array_from_numpy.type.value_type == arrow_type
     assert tensor_array_from_numpy.type.shape == [2, 3]
 
     arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]],
-                   dtype=value_type, order="F")
+                   dtype=np.dtype(np_type_str), order="F")
     with pytest.raises(ValueError, match="First stride needs to be largest"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+                        dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
 
     arr = flat_arr.reshape(1, 3, 4)
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
@@ -1477,23 +1539,26 @@ def test_tensor_array_from_numpy(value_type):
     arr = flat_arr.reshape(1, 2, 3, 2)
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
-    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], dtype=value_type)
+    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                   dtype=np.dtype(np_type_str))
     expected = arr[1:]
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
-    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array(1, dtype=value_type)
+    arr = np.array(1, dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array([], dtype=value_type)
+    arr = np.array([], dtype=np.dtype(np_type_str))
 
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0)))
@@ -1505,6 +1570,7 @@ def test_tensor_array_from_numpy(value_type):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((3, 0, 2)))
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("tensor_type", (
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3]),
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3], permutation=[0, 2, 1]),
@@ -1693,9 +1759,8 @@ def test_opaque_type(pickle_module, storage_type, storage):
     arr = pa.ExtensionArray.from_storage(opaque_type, storage)
     assert isinstance(arr, opaque_arr_class)
 
-    with registered_extension_type(opaque_type):
-        buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
-        batch = ipc_read_batch(buf)
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
+    batch = ipc_read_batch(buf)
 
     assert batch.column(0).type.extension_name == "arrow.opaque"
     assert isinstance(batch.column(0), opaque_arr_class)
@@ -1761,6 +1826,7 @@ def test_bool8_to_bool_conversion():
     assert bool_arr.cast(pa.bool8()) == canonical_bool8_arr
 
 
+@pytest.mark.numpy
 def test_bool8_to_numpy_conversion():
     arr = pa.ExtensionArray.from_storage(
         pa.bool8(),
@@ -1801,6 +1867,7 @@ def test_bool8_to_numpy_conversion():
     assert arr_to_np_writable.ctypes.data != arr_no_nulls.buffers()[1].address
 
 
+@pytest.mark.numpy
 def test_bool8_from_numpy_conversion():
     np_arr_no_nulls = np.array([True, False, True, True], dtype=np.bool_)
     canonical_bool8_arr_no_nulls = pa.ExtensionArray.from_storage(
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 0064006489088..18c8cd5b654e6 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -23,7 +23,10 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -135,6 +138,7 @@ def f():
     pytest.raises(exc, f)
 
 
+@pytest.mark.numpy
 def test_dataset(version):
     num_values = (100, 100)
     num_files = 5
@@ -354,6 +358,7 @@ def test_buffer_bounds_error(version):
         _check_arrow_roundtrip(table)
 
 
+@pytest.mark.numpy
 def test_boolean_object_nulls(version):
     repeats = 100
     table = pa.Table.from_arrays(
@@ -540,6 +545,7 @@ def test_read_columns(version):
                             columns=['boo', 'woo'])
 
 
+@pytest.mark.numpy
 def test_overwritten_file(version):
     path = random_path()
     TEST_FILES.append(path)
@@ -675,6 +681,7 @@ def test_v2_compression_options():
         write_feather(df, buf, compression='snappy')
 
 
+@pytest.mark.numpy
 def test_v2_lz4_default_compression():
     # ARROW-8750: Make sure that the compression=None option selects lz4 if
     # it's available
@@ -807,6 +814,7 @@ def test_nested_types(compression):
     _check_arrow_roundtrip(table, compression=compression)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables, st.sampled_from(["uncompressed", "lz4", "zstd"]))
 def test_roundtrip(table, compression):
     _check_arrow_roundtrip(table, compression=compression)
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 832c6a2dbdf9f..029a2695b9fd8 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -28,7 +28,10 @@
 import traceback
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 
@@ -1588,6 +1591,7 @@ def test_flight_do_put_metadata():
                 assert idx == server_idx
 
 
+@pytest.mark.numpy
 def test_flight_do_put_limit():
     """Try a simple do_put call with a size limit."""
     large_batch = pa.RecordBatch.from_arrays([
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 0d12d710dcf64..2ac2f55754fe5 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -409,7 +409,7 @@ def test_types_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -447,7 +447,7 @@ def test_types_heap(gdb_arrow):
 
     check_heap_repr(
         gdb_arrow, "heap_uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -716,12 +716,12 @@ def test_scalars_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "extension_scalar",
-        ('arrow::ExtensionScalar of type "extension<uuid>", '
+        ('arrow::ExtensionScalar of type "extension<arrow.uuid>", '
          'value arrow::FixedSizeBinaryScalar of size 16, '
          'value "0123456789abcdef"'))
     check_stack_repr(
         gdb_arrow, "extension_scalar_null",
-        'arrow::ExtensionScalar of type "extension<uuid>", null value')
+        'arrow::ExtensionScalar of type "extension<arrow.uuid>", null value')
 
 
 def test_scalars_heap(gdb_arrow):
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index ef499a3a8d76c..e2df1b1c46835 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -25,11 +25,15 @@
 import os
 import pathlib
 import pytest
+import random
 import sys
 import tempfile
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -464,6 +468,7 @@ def test_buffer_hex(val, expected_hex_buffer):
     assert buf.hex() == expected_hex_buffer
 
 
+@pytest.mark.numpy
 def test_buffer_to_numpy():
     # Make sure creating a numpy array from an arrow buffer works
     byte_array = bytearray(20)
@@ -476,6 +481,7 @@ def test_buffer_to_numpy():
     assert array.base == buf
 
 
+@pytest.mark.numpy
 def test_buffer_from_numpy():
     # C-contiguous
     arr = np.arange(12, dtype=np.int8).reshape((3, 4))
@@ -493,6 +499,7 @@ def test_buffer_from_numpy():
         buf = pa.py_buffer(arr.T[::2])
 
 
+@pytest.mark.numpy
 def test_buffer_address():
     b1 = b'some data!'
     b2 = bytearray(b1)
@@ -513,6 +520,7 @@ def test_buffer_address():
     assert buf.address == arr.ctypes.data
 
 
+@pytest.mark.numpy
 def test_buffer_equals():
     # Buffer.equals() returns true iff the buffers have the same contents
     def eq(a, b):
@@ -624,6 +632,7 @@ def test_buffer_hashing():
         hash(pa.py_buffer(b'123'))
 
 
+@pytest.mark.numpy
 def test_buffer_protocol_respects_immutability():
     # ARROW-3228; NumPy's frombuffer ctor determines whether a buffer-like
     # object is mutable by first attempting to get a mutable buffer using
@@ -635,6 +644,7 @@ def test_buffer_protocol_respects_immutability():
     assert not numpy_ref.flags.writeable
 
 
+@pytest.mark.numpy
 def test_foreign_buffer():
     obj = np.array([1, 2], dtype=np.int32)
     addr = obj.__array_interface__["data"][0]
@@ -669,6 +679,7 @@ def test_allocate_buffer_resizable():
     assert buf.size == 200
 
 
+@pytest.mark.numpy
 def test_non_cpu_buffer(pickle_module):
     cuda = pytest.importorskip("pyarrow.cuda")
     ctx = cuda.Context(0)
@@ -798,6 +809,7 @@ def test_cache_options_pickling(pickle_module):
         assert pickle_module.loads(pickle_module.dumps(option)) == option
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -838,6 +850,7 @@ def test_compress_decompress(compression):
         pa.decompress(compressed_bytes, codec=compression)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -996,6 +1009,7 @@ def make_buffer(bytes_obj):
     assert refcount_before == sys.getrefcount(val)
 
 
+@pytest.mark.numpy
 def test_nativefile_write_memoryview():
     f = pa.BufferOutputStream()
     data = b'ok'
@@ -1058,8 +1072,8 @@ def test_mock_output_stream():
 @pytest.fixture
 def sample_disk_data(request, tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1146,8 +1160,8 @@ def test_memory_map_writer(tmpdir):
     if sys.platform == "emscripten":
         pytest.xfail("Multiple memory maps to same file don't work on emscripten")
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1187,9 +1201,9 @@ def test_memory_map_writer(tmpdir):
 
 def test_memory_map_resize(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype(np.uint8)
-    data1 = arr.tobytes()[:(SIZE // 2)]
-    data2 = arr.tobytes()[(SIZE // 2):]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data1 = bytes(arr[:(SIZE // 2)])
+    data2 = bytes(arr[(SIZE // 2):])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1202,7 +1216,7 @@ def test_memory_map_resize(tmpdir):
     mmap.close()
 
     with open(path, 'rb') as f:
-        assert f.read() == arr.tobytes()
+        assert f.read() == bytes(arr[:SIZE])
 
 
 def test_memory_zero_length(tmpdir):
@@ -1241,8 +1255,8 @@ def test_memory_map_deref_remove(tmpdir):
 
 def test_os_file_writer(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1523,6 +1537,7 @@ def test_buffered_input_stream_detach_non_seekable():
         raw.seek(2)
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
@@ -1540,6 +1555,7 @@ def test_buffered_output_stream():
     assert np_buf[:10].tobytes() == b'123456789\0'
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream_detach():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 1e5242efe40f0..4be5792a92f6d 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -20,11 +20,15 @@
 import io
 import pathlib
 import pytest
+import random
 import socket
 import threading
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
@@ -59,7 +63,7 @@ def write_batches(self, num_batches=5, as_table=False):
         batches = []
         for i in range(num_batches):
             batch = pa.record_batch(
-                [np.random.randn(nrows),
+                [[random.random() for _ in range(nrows)],
                  ['foo', None, 'bar', 'bazbaz', 'qux']],
                 schema=schema)
             batches.append(batch)
@@ -422,7 +426,7 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format):
 @pytest.mark.zstd
 def test_compression_roundtrip():
     sink = io.BytesIO()
-    values = np.random.randint(0, 3, 10000)
+    values = [random.randint(0, 3) for _ in range(10000)]
     table = pa.Table.from_arrays([values], names=["values"])
 
     options = pa.ipc.IpcWriteOptions(compression='zstd')
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index a0a6174266310..3bb4440e89750 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -23,7 +23,10 @@
 import string
 import unittest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -297,6 +300,7 @@ def test_explicit_schema_with_unexpected_behaviour(self):
                            match="JSON parse error: unexpected field"):
             self.read_bytes(rows, parse_options=opts)
 
+    @pytest.mark.numpy
     def test_small_random_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=10)
         table = self.read_bytes(data)
@@ -304,6 +308,7 @@ def test_small_random_json(self):
         assert table.equals(expected)
         assert table.to_pydict() == expected.to_pydict()
 
+    @pytest.mark.numpy
     def test_load_large_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=100100)
         # set block size is 10MB
@@ -312,6 +317,7 @@ def test_load_large_json(self):
         assert table.num_rows == 100100
         assert expected.num_rows == 100100
 
+    @pytest.mark.numpy
     def test_stress_block_sizes(self):
         # Test a number of small block sizes to stress block stitching
         data_base, expected = make_random_json(num_cols=2, num_rows=100)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 208812c3ac458..178a073ed59dc 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -27,9 +27,18 @@
 
 import hypothesis as h
 import hypothesis.strategies as st
-import numpy as np
-import numpy.testing as npt
 import pytest
+try:
+    import numpy as np
+    import numpy.testing as npt
+    try:
+        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
+    except AttributeError:
+        from numpy.exceptions import (
+            VisibleDeprecationWarning as _np_VisibleDeprecationWarning
+        )
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
@@ -51,14 +60,6 @@
     pass
 
 
-try:
-    _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
-except AttributeError:
-    from numpy.exceptions import (
-        VisibleDeprecationWarning as _np_VisibleDeprecationWarning
-    )
-
-
 # Marks all of the tests in this module
 pytestmark = pytest.mark.pandas
 
@@ -1202,9 +1203,11 @@ def test_datetime64_to_date32(self):
 
     @pytest.mark.parametrize('mask', [
         None,
-        np.array([True, False, False, True, False, False]),
+        [True, False, False, True, False, False],
     ])
     def test_pandas_datetime_to_date64(self, mask):
+        if mask:
+            mask = np.array(mask)
         s = pd.to_datetime([
             '2018-05-10T00:00:00',
             '2018-05-11T00:00:00',
@@ -1608,7 +1611,8 @@ def test_array_from_pandas_date_with_mask(self):
         assert pa.Array.from_pandas(expected).equals(result)
 
     @pytest.mark.skipif(
-        Version('1.16.0') <= Version(np.__version__) < Version('1.16.1'),
+        np is not None and Version('1.16.0') <= Version(
+            np.__version__) < Version('1.16.1'),
         reason='Until numpy/numpy#12745 is resolved')
     def test_fixed_offset_timezone(self):
         df = pd.DataFrame({
@@ -2921,23 +2925,23 @@ class TestConvertMisc:
     """
 
     type_pairs = [
-        (np.int8, pa.int8()),
-        (np.int16, pa.int16()),
-        (np.int32, pa.int32()),
-        (np.int64, pa.int64()),
-        (np.uint8, pa.uint8()),
-        (np.uint16, pa.uint16()),
-        (np.uint32, pa.uint32()),
-        (np.uint64, pa.uint64()),
-        (np.float16, pa.float16()),
-        (np.float32, pa.float32()),
-        (np.float64, pa.float64()),
+        ("int8", pa.int8()),
+        ("int16", pa.int16()),
+        ("int32", pa.int32()),
+        ("int64", pa.int64()),
+        ("uint8", pa.uint8()),
+        ("uint16", pa.uint16()),
+        ("uint32", pa.uint32()),
+        ("uint64", pa.uint64()),
+        ("float16", pa.float16()),
+        ("float32", pa.float32()),
+        ("float64", pa.float64()),
         # XXX unsupported
         # (np.dtype([('a', 'i2')]), pa.struct([pa.field('a', pa.int16())])),
-        (np.object_, pa.string()),
-        (np.object_, pa.binary()),
-        (np.object_, pa.binary(10)),
-        (np.object_, pa.list_(pa.int64())),
+        ("object", pa.string()),
+        ("object", pa.binary()),
+        ("object", pa.binary(10)),
+        ("object", pa.list_(pa.int64())),
     ]
 
     def test_all_none_objects(self):
@@ -2950,8 +2954,8 @@ def test_all_none_category(self):
         _check_pandas_roundtrip(df)
 
     def test_empty_arrays(self):
-        for dtype, pa_type in self.type_pairs:
-            arr = np.array([], dtype=dtype)
+        for dtype_str, pa_type in self.type_pairs:
+            arr = np.array([], dtype=np.dtype(dtype_str))
             _check_array_roundtrip(arr, type=pa_type)
 
     def test_non_threaded_conversion(self):
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index bc50697e1be17..3f4a53c473e7e 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -20,7 +20,10 @@
 import pytest
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -40,7 +43,6 @@
     (1, pa.int64(), pa.Int64Scalar),
     (1, pa.uint64(), pa.UInt64Scalar),
     (1.0, None, pa.DoubleScalar),
-    (np.float16(1.0), pa.float16(), pa.HalfFloatScalar),
     (1.0, pa.float32(), pa.FloatScalar),
     (decimal.Decimal("1.123"), None, pa.Decimal128Scalar),
     (decimal.Decimal("1.1234567890123456789012345678901234567890"),
@@ -98,6 +100,40 @@ def test_basics(value, ty, klass, pickle_module):
     assert wr() is None
 
 
+# This test is a copy of test_basics but only for float16 (HalfFloatScalar)
+# which currently requires a numpy scalar to create it. The test collection
+# fails if numpy is used on the parametrization when not present.
+@pytest.mark.numpy
+def test_basics_np_required(pickle_module):
+    value, ty, klass = np.float16(1.0), pa.float16(), pa.HalfFloatScalar
+    s = pa.scalar(value, type=ty)
+    s.validate()
+    s.validate(full=True)
+    assert isinstance(s, klass)
+    assert s.as_py() == value
+    assert s == pa.scalar(value, type=ty)
+    assert s != value
+    assert s != "else"
+    assert hash(s) == hash(s)
+    assert s.is_valid is True
+    assert s != None  # noqa: E711
+
+    s = pa.scalar(None, type=s.type)
+    assert s.is_valid is False
+    assert s.as_py() is None
+    assert s != pa.scalar(value, type=ty)
+
+    # test pickle roundtrip
+    restored = pickle_module.loads(pickle_module.dumps(s))
+    assert s.equals(restored)
+
+    # test that scalars are weak-referenceable
+    wr = weakref.ref(s)
+    assert wr() is not None
+    del s
+    assert wr() is None
+
+
 def test_invalid_scalar():
     s = pc.cast(pa.scalar(b"\xff"), pa.string(), safe=False)
     s.validate()
@@ -202,14 +238,15 @@ def test_numerics():
     assert str(s) == "1.5"
     assert s.as_py() == 1.5
 
-    # float16
-    s = pa.scalar(np.float16(0.5), type='float16')
-    assert isinstance(s, pa.HalfFloatScalar)
-    # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
-    # on numpy1 repr(np.float16(0.5)) == "0.5"
-    assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
-    assert str(s) == "0.5"
-    assert s.as_py() == 0.5
+    if np is not None:
+        # float16
+        s = pa.scalar(np.float16(0.5), type='float16')
+        assert isinstance(s, pa.HalfFloatScalar)
+        # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
+        # on numpy1 repr(np.float16(0.5)) == "0.5"
+        assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
+        assert str(s) == "0.5"
+        assert s.as_py() == 0.5
 
 
 def test_decimal128():
@@ -434,6 +471,7 @@ def test_timestamp_fixed_offset_print():
     assert str(arr[0]) == "1970-01-01 02:00:00+02:00"
 
 
+@pytest.mark.numpy
 def test_duration():
     arr = np.array([0, 3600000000000], dtype='timedelta64[ns]')
 
@@ -559,6 +597,7 @@ def test_list(ty, klass):
         s[2]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('ty', [
     pa.list_(pa.int64()),
     pa.large_list(pa.int64()),
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 1b05c58384cf0..bdcb6c2b42d78 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -20,7 +20,10 @@
 import weakref
 
 import pytest
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
@@ -185,6 +188,7 @@ def test_time_types():
         pa.time64('s')
 
 
+@pytest.mark.numpy
 def test_from_numpy_dtype():
     cases = [
         (np.dtype('bool'), pa.bool_()),
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index aa7da0a742086..7ba9e2b3e13db 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -19,7 +19,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 try:
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index 14fc949928c33..da50bcda52f2b 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -17,6 +17,8 @@
 
 import hypothesis as h
 
+import pytest
+
 import pyarrow as pa
 import pyarrow.tests.strategies as past
 
@@ -36,11 +38,13 @@ def test_schemas(schema):
     assert isinstance(schema, pa.lib.Schema)
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_arrays(array):
     assert isinstance(array, pa.lib.Array)
 
 
+@pytest.mark.numpy
 @h.given(past.arrays(past.primitive_types, nullable=False))
 def test_array_nullability(array):
     assert array.null_count == 0
@@ -56,6 +60,7 @@ def test_record_batches(record_bath):
     assert isinstance(record_bath, pa.lib.RecordBatch)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables)
 def test_tables(table):
     assert isinstance(table, pa.lib.Table)
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index 40700e4741321..01d468cd9e9cc 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -608,6 +608,7 @@ def table_provider(names, schema):
     assert res_tb == expected
 
 
+@pytest.mark.numpy
 def test_scalar_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
@@ -756,6 +757,7 @@ def table_provider(names, _):
     assert res_tb == expected_tb
 
 
+@pytest.mark.numpy
 def test_hash_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index cd38909edf357..c3f805b4b32d6 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -20,10 +20,14 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
+from pyarrow.interchange import from_dataframe
 from pyarrow.vendored.version import Version
 
 
@@ -125,6 +129,7 @@ def test_chunked_array_can_combine_chunks_with_no_chunks():
     ).combine_chunks() == pa.array([], type=pa.bool_())
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy():
     data = pa.chunked_array([
         [1, 2, 3],
@@ -173,6 +178,7 @@ def test_chunked_array_str():
 ]"""
 
 
+@pytest.mark.numpy
 def test_chunked_array_getitem():
     data = [
         pa.array([1, 2, 3]),
@@ -972,12 +978,14 @@ def check_tensors(tensor, expected_tensor, type, size):
     assert tensor.strides == expected_tensor.strides
 
 
-@pytest.mark.parametrize('typ', [
-    np.uint8, np.uint16, np.uint32, np.uint64,
-    np.int8, np.int16, np.int32, np.int64,
-    np.float32, np.float64,
+@pytest.mark.numpy
+@pytest.mark.parametrize('typ_str', [
+    "uint8", "uint16", "uint32", "uint64",
+    "int8", "int16", "int32", "int64",
+    "float32", "float64",
 ])
-def test_recordbatch_to_tensor_uniform_type(typ):
+def test_recordbatch_to_tensor_uniform_type(typ_str):
+    typ = np.dtype(typ_str)
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
     arr3 = [100, 100, 100, 100, 100, 100, 100, 100, 100]
@@ -1031,6 +1039,7 @@ def test_recordbatch_to_tensor_uniform_type(typ):
     check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_uniform_float_16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1054,6 +1063,7 @@ def test_recordbatch_to_tensor_uniform_float_16():
     check_tensors(result, expected, pa.float16(), 27)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_mixed_type():
     # uint16 + int16 = int32
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
@@ -1105,6 +1115,7 @@ def test_recordbatch_to_tensor_mixed_type():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1124,6 +1135,7 @@ def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
         batch.to_tensor()
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_nan():
     arr1 = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
@@ -1144,6 +1156,7 @@ def test_recordbatch_to_tensor_nan():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_null():
     arr1 = [1, 2, 3, 4, None, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, None, 90]
@@ -1204,6 +1217,7 @@ def test_recordbatch_to_tensor_null():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_empty():
     batch = pa.RecordBatch.from_arrays(
         [
@@ -1295,6 +1309,7 @@ def test_slice_zero_length_table():
     table.to_pandas()
 
 
+@pytest.mark.numpy
 def test_recordbatchlist_schema_equals():
     a1 = np.array([1], dtype='uint32')
     a2 = np.array([4.0, 5.0], dtype='float64')
@@ -2130,6 +2145,7 @@ def test_table_unsafe_casting(cls):
     assert casted_table.equals(expected_table)
 
 
+@pytest.mark.numpy
 def test_invalid_table_construct():
     array = np.array([0, 1], dtype=np.uint8)
     u8 = pa.uint8()
@@ -3287,6 +3303,7 @@ def test_table_sort_by(cls):
     assert sorted_tab_dict["b"] == ["foo", "car", "bar", "foobar"]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_asarray(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
@@ -3319,6 +3336,7 @@ def test_numpy_asarray(constructor):
     assert result.dtype == "int32"
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_array_protocol(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
@@ -3357,3 +3375,365 @@ def test_invalid_non_join_column():
     with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
         t2.join(t1, 'id', join_type='inner')
     assert exp_error_msg in str(excinfo.value)
+
+
+@pytest.fixture
+def cuda_context():
+    cuda = pytest.importorskip("pyarrow.cuda")
+    return cuda.Context(0)
+
+
+@pytest.fixture
+def schema():
+    return pa.schema([pa.field('c0', pa.int32()), pa.field('c1', pa.int32())])
+
+
+@pytest.fixture
+def cpu_arrays(schema):
+    return [pa.array([1, 2, 3, 4, 5], schema.field(0).type),
+            pa.array([-10, -5, 0, None, 10], schema.field(1).type)]
+
+
+@pytest.fixture
+def cuda_arrays(cuda_context, cpu_arrays):
+    return [arr.copy_to(cuda_context.memory_manager) for arr in cpu_arrays]
+
+
+@pytest.fixture
+def cpu_chunked_array(cpu_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays)
+    assert chunked_array.is_cpu is True
+    return chunked_array
+
+
+@pytest.fixture
+def cuda_chunked_array(cuda_arrays):
+    chunked_array = pa.chunked_array(cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_and_cuda_chunked_array(cpu_arrays, cuda_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays + cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_recordbatch(cpu_arrays, schema):
+    return pa.record_batch(cpu_arrays, schema=schema)
+
+
+@pytest.fixture
+def cuda_recordbatch(cuda_context, cpu_recordbatch):
+    return cpu_recordbatch.copy_to(cuda_context.memory_manager)
+
+
+def test_chunked_array_non_cpu(cuda_context, cpu_chunked_array, cuda_chunked_array,
+                               cpu_and_cuda_chunked_array):
+    # type test
+    assert cuda_chunked_array.type == cpu_chunked_array.type
+
+    # length() test
+    assert cuda_chunked_array.length() == cpu_chunked_array.length()
+
+    # str() test
+    assert str(cuda_chunked_array) == str(cpu_chunked_array)
+
+    # repr() test
+    assert str(cuda_chunked_array) in repr(cuda_chunked_array)
+
+    # validate() test
+    cuda_chunked_array.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.validate(full=True)
+
+    # null_count test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.null_count
+
+    # nbytes() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.get_total_buffer_size()
+
+    # getitem() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array[0]
+
+    # is_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_null()
+
+    # is_nan() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_nan()
+
+    # is_valid() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_valid()
+
+    # fill_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.fill_null(0)
+
+    # equals() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array == cuda_chunked_array
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pandas()
+
+    # to_numpy() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_numpy()
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__array__()
+
+    # cast() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.cast()
+
+    # dictionary_encode() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.dictionary_encode()
+
+    # flatten() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.flatten()
+
+    # combine_chunks() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.combine_chunks()
+
+    # unique() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unique()
+
+    # value_counts() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.value_counts()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.filter([True, False, True, False, True])
+
+    # index() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.index(5)
+
+    # slice() test
+    cuda_chunked_array.slice(2, 2)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.take([1])
+
+    # drop_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.drop_null()
+
+    # sort() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.sort()
+
+    # unify_dictionaries() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unify_dictionaries()
+
+    # num_chunks test
+    assert cuda_chunked_array.num_chunks == cpu_chunked_array.num_chunks
+
+    # chunks test
+    assert len(cuda_chunked_array.chunks) == len(cpu_chunked_array.chunks)
+
+    # chunk() test
+    chunk = cuda_chunked_array.chunk(0)
+    assert chunk.device_type == pa.DeviceAllocationType.CUDA
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pylist()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__arrow_c_stream__()
+
+    # __reduce__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__reduce__()
+
+
+def verify_cuda_recordbatch(batch, expected_schema):
+    batch.validate()
+    assert batch.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.is_cpu is False
+    assert batch.num_columns == len(expected_schema.names)
+    assert batch.column_names == expected_schema.names
+    assert str(batch) in repr(batch)
+    for c in batch.columns:
+        assert c.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.schema == expected_schema
+
+
+def test_recordbatch_non_cpu(cuda_context, cpu_recordbatch, cuda_recordbatch,
+                             cuda_arrays, schema):
+    verify_cuda_recordbatch(cuda_recordbatch, expected_schema=schema)
+    assert cuda_recordbatch.shape == (5, 2)
+
+    # columns() test
+    assert len(cuda_recordbatch.columns) == 2
+
+    # add_column(), set_column() test
+    for fn in [cuda_recordbatch.add_column, cuda_recordbatch.set_column]:
+        col = pa.array([6, 7, 8, 9, 10], pa.int8()).copy_to(cuda_context.memory_manager)
+        new_batch = fn(2, 'c2', col)
+        assert len(new_batch.columns) == 3
+        for c in new_batch.columns:
+            assert c.device_type == pa.DeviceAllocationType.CUDA
+        err_msg = ("Got column on device <DeviceAllocationType.CPU: 1>, "
+                   "but expected <DeviceAllocationType.CUDA: 2>.")
+        with pytest.raises(TypeError, match=err_msg):
+            fn(2, 'c2', [1, 1, 1, 1, 1])
+
+    # remove_column() test
+    new_batch = cuda_recordbatch.remove_column(1)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # drop_columns() test
+    new_batch = cuda_recordbatch.drop_columns(['c0', 'c1'])
+    assert len(new_batch.columns) == 0
+    assert new_batch.device_type == pa.DeviceAllocationType.CUDA
+
+    # select() test
+    new_batch = cuda_recordbatch.select(['c0'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # cast() test
+    new_schema = pa.schema([pa.field('c0', pa.int64()), pa.field('c1', pa.int64())])
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.cast(new_schema)
+
+    # drop_null() test
+    null_col = pa.array([-2, -1, 0, 1, 2],
+                        mask=[True, False, True, False, True]).copy_to(
+        cuda_context.memory_manager)
+    cuda_recordbatch_with_nulls = cuda_recordbatch.add_column(2, 'c2', null_col)
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch_with_nulls.drop_null()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.filter([True] * 5)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.take([0])
+
+    # sort_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.sort_by('c0')
+
+    # field() test
+    assert cuda_recordbatch.field(0) == schema.field(0)
+    assert cuda_recordbatch.field(1) == schema.field(1)
+
+    # equals() test
+    new_batch = cpu_recordbatch.copy_to(cuda_context.memory_manager)
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.equals(new_batch) is True
+
+    # from_arrays() test
+    new_batch = pa.RecordBatch.from_arrays(cuda_arrays, ['c0', 'c1'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_pydict() test
+    new_batch = pa.RecordBatch.from_pydict({'c0': cuda_arrays[0], 'c1': cuda_arrays[1]})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_struct_array() test
+    fields = [schema.field(i) for i in range(len(schema.names))]
+    struct_array = pa.StructArray.from_arrays(cuda_arrays, fields=fields)
+    with pytest.raises(NotImplementedError):
+        pa.RecordBatch.from_struct_array(struct_array)
+
+    # nbytes test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.get_total_buffer_size()
+
+    # to_pydict() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pydict()
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pylist()
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pandas()
+
+    # to_tensor() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_tensor()
+
+    # to_struct_array() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_struct_array()
+
+    # serialize() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.serialize()
+
+    # slice() test
+    new_batch = cuda_recordbatch.slice(1, 3)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.num_rows == 3
+    cpu_batch = new_batch.copy_to(pa.default_cpu_memory_manager())
+    assert cpu_batch == cpu_recordbatch.slice(1, 3)
+
+    # replace_schema_metadata() test
+    new_batch = cuda_recordbatch.replace_schema_metadata({b'key': b'value'})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.schema.metadata == {b'key': b'value'}
+
+    # rename_columns() test
+    new_batch = cuda_recordbatch.rename_columns(['col0', 'col1'])
+    expected_schema = pa.schema(
+        [pa.field('col0', schema.field(0).type),
+         pa.field('col1', schema.field(1).type)])
+    verify_cuda_recordbatch(new_batch, expected_schema=expected_schema)
+
+    # validate() test
+    cuda_recordbatch.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.validate(full=True)
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__array__()
+
+    # __arrow_c_array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_array__()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_stream__()
+
+    # __dataframe__() test
+    with pytest.raises(NotImplementedError):
+        from_dataframe(cuda_recordbatch.__dataframe__())
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 29c6de65b1607..debb1066280c1 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -21,7 +21,10 @@
 import warnings
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index d673f956527aa..cc680939ac46a 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -30,7 +30,10 @@
     tzst = None
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
@@ -1265,14 +1268,16 @@ def test_field_modified_copies():
 
 def test_is_integer_value():
     assert pa.types.is_integer_value(1)
-    assert pa.types.is_integer_value(np.int64(1))
+    if np is not None:
+        assert pa.types.is_integer_value(np.int64(1))
     assert not pa.types.is_integer_value('1')
 
 
 def test_is_float_value():
     assert not pa.types.is_float_value(1)
     assert pa.types.is_float_value(1.)
-    assert pa.types.is_float_value(np.float64(1))
+    if np is not None:
+        assert pa.types.is_float_value(np.float64(1))
     assert not pa.types.is_float_value('1.0')
 
 
@@ -1280,8 +1285,9 @@ def test_is_boolean_value():
     assert not pa.types.is_boolean_value(1)
     assert pa.types.is_boolean_value(True)
     assert pa.types.is_boolean_value(False)
-    assert pa.types.is_boolean_value(np.bool_(True))
-    assert pa.types.is_boolean_value(np.bool_(False))
+    if np is not None:
+        assert pa.types.is_boolean_value(np.bool_(True))
+        assert pa.types.is_boolean_value(np.bool_(False))
 
 
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 22fefbbb58ba9..93004a30618a7 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -18,7 +18,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow import compute as pc
@@ -749,6 +752,7 @@ def test_udt_datasource1_exception():
         _test_datasource1_udt(datasource1_exception)
 
 
+@pytest.mark.numpy
 def test_scalar_agg_basic(unary_agg_func_fixture):
     arr = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     result = pc.call_function("mean_udf", [arr])
@@ -756,6 +760,7 @@ def test_scalar_agg_basic(unary_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_empty(unary_agg_func_fixture):
     empty = pa.array([], pa.float64())
 
@@ -775,6 +780,7 @@ def test_scalar_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         pc.call_function("y=wrong_output_type(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_scalar_agg_varargs(varargs_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([1.0, 2.0, 3.0, 4.0, 5.0], pa.float64())
@@ -786,6 +792,7 @@ def test_scalar_agg_varargs(varargs_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_exception(exception_agg_func_fixture):
     arr = pa.array([10, 20, 30, 40, 50, 60], pa.int64())
 
@@ -793,6 +800,7 @@ def test_scalar_agg_exception(exception_agg_func_fixture):
         pc.call_function("y=exception_len(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_hash_agg_basic(unary_agg_func_fixture):
     arr1 = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -811,6 +819,7 @@ def test_hash_agg_basic(unary_agg_func_fixture):
     assert result.sort_by('id') == expected.sort_by('id')
 
 
+@pytest.mark.numpy
 def test_hash_agg_empty(unary_agg_func_fixture):
     arr1 = pa.array([], pa.float64())
     arr2 = pa.array([], pa.int32())
@@ -841,6 +850,7 @@ def test_hash_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=wrong_output_type(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_exception(exception_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -850,6 +860,7 @@ def test_hash_agg_exception(exception_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=exception_len(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_random(sum_agg_func_fixture):
     """Test hash aggregate udf with randomly sampled data"""
 
diff --git a/python/pyarrow/tests/test_without_numpy.py b/python/pyarrow/tests/test_without_numpy.py
new file mode 100644
index 0000000000000..55c12602ce89a
--- /dev/null
+++ b/python/pyarrow/tests/test_without_numpy.py
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+import pyarrow as pa
+
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not nonumpy'
+pytestmark = pytest.mark.nonumpy
+
+
+def test_array_to_np():
+    arr = pa.array(range(10))
+
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        arr.to_numpy()
+
+
+def test_chunked_array_to_np():
+    data = pa.chunked_array([
+        [1, 2, 3],
+        [4, 5, 6],
+        []
+    ])
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        data.to_numpy()
+
+
+def test_tensor_to_np():
+    tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    tensor_array = pa.ExtensionArray.from_storage(tensor_type, storage)
+
+    tensor = tensor_array.to_tensor()
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        tensor.to_numpy()
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 638eee9807335..aa6dd21f800c5 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -22,7 +22,6 @@
 import contextlib
 import decimal
 import gc
-import numpy as np
 import os
 import random
 import re
@@ -110,27 +109,15 @@ def randdecimal(precision, scale):
 
 
 def random_ascii(length):
-    return bytes(np.random.randint(65, 123, size=length, dtype='i1'))
+    return bytes([random.randint(65, 122) for i in range(length)])
 
 
 def rands(nchars):
     """
     Generate one random string.
     """
-    RANDS_CHARS = np.array(
-        list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
-    return "".join(np.random.choice(RANDS_CHARS, nchars))
-
-
-def make_dataframe():
-    import pandas as pd
-
-    N = 30
-    df = pd.DataFrame(
-        {col: np.random.randn(N) for col in string.ascii_uppercase[:4]},
-        index=pd.Index([rands(10) for _ in range(N)])
-    )
-    return df
+    RANDS_CHARS = list(string.ascii_letters + string.digits)
+    return "".join(random.choice(RANDS_CHARS) for i in range(nchars))
 
 
 def memory_leak_check(f, metric='rss', threshold=1 << 17, iterations=10,
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 563782f0c2643..a46caff1f21a4 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -33,42 +33,50 @@ from cython import sizeof
 
 # These are imprecise because the type (in pandas 0.x) depends on the presence
 # of nulls
-cdef dict _pandas_type_map = {
-    _Type_NA: np.object_,  # NaNs
-    _Type_BOOL: np.bool_,
-    _Type_INT8: np.int8,
-    _Type_INT16: np.int16,
-    _Type_INT32: np.int32,
-    _Type_INT64: np.int64,
-    _Type_UINT8: np.uint8,
-    _Type_UINT16: np.uint16,
-    _Type_UINT32: np.uint32,
-    _Type_UINT64: np.uint64,
-    _Type_HALF_FLOAT: np.float16,
-    _Type_FLOAT: np.float32,
-    _Type_DOUBLE: np.float64,
-    # Pandas does not support [D]ay, so default to [ms] for date32
-    _Type_DATE32: np.dtype('datetime64[ms]'),
-    _Type_DATE64: np.dtype('datetime64[ms]'),
-    _Type_TIMESTAMP: {
-        's': np.dtype('datetime64[s]'),
-        'ms': np.dtype('datetime64[ms]'),
-        'us': np.dtype('datetime64[us]'),
-        'ns': np.dtype('datetime64[ns]'),
-    },
-    _Type_DURATION: {
-        's': np.dtype('timedelta64[s]'),
-        'ms': np.dtype('timedelta64[ms]'),
-        'us': np.dtype('timedelta64[us]'),
-        'ns': np.dtype('timedelta64[ns]'),
-    },
-    _Type_BINARY: np.object_,
-    _Type_FIXED_SIZE_BINARY: np.object_,
-    _Type_STRING: np.object_,
-    _Type_LIST: np.object_,
-    _Type_MAP: np.object_,
-    _Type_DECIMAL128: np.object_,
-}
+cdef dict _pandas_type_map = {}
+
+
+def _get_pandas_type_map():
+    global _pandas_type_map
+    if not _pandas_type_map:
+        _pandas_type_map.update({
+            _Type_NA: np.object_,  # NaNs
+            _Type_BOOL: np.bool_,
+            _Type_INT8: np.int8,
+            _Type_INT16: np.int16,
+            _Type_INT32: np.int32,
+            _Type_INT64: np.int64,
+            _Type_UINT8: np.uint8,
+            _Type_UINT16: np.uint16,
+            _Type_UINT32: np.uint32,
+            _Type_UINT64: np.uint64,
+            _Type_HALF_FLOAT: np.float16,
+            _Type_FLOAT: np.float32,
+            _Type_DOUBLE: np.float64,
+            # Pandas does not support [D]ay, so default to [ms] for date32
+            _Type_DATE32: np.dtype('datetime64[ms]'),
+            _Type_DATE64: np.dtype('datetime64[ms]'),
+            _Type_TIMESTAMP: {
+                's': np.dtype('datetime64[s]'),
+                'ms': np.dtype('datetime64[ms]'),
+                'us': np.dtype('datetime64[us]'),
+                'ns': np.dtype('datetime64[ns]'),
+            },
+            _Type_DURATION: {
+                's': np.dtype('timedelta64[s]'),
+                'ms': np.dtype('timedelta64[ms]'),
+                'us': np.dtype('timedelta64[us]'),
+                'ns': np.dtype('timedelta64[ns]'),
+            },
+            _Type_BINARY: np.object_,
+            _Type_FIXED_SIZE_BINARY: np.object_,
+            _Type_STRING: np.object_,
+            _Type_LIST: np.object_,
+            _Type_MAP: np.object_,
+            _Type_DECIMAL128: np.object_,
+        })
+    return _pandas_type_map
+
 
 cdef dict _pep3118_type_map = {
     _Type_INT8: b'b',
@@ -149,14 +157,15 @@ def _is_primitive(Type type):
 
 def _get_pandas_type(arrow_type, coerce_to_ns=False):
     cdef Type type_id = arrow_type.id
-    if type_id not in _pandas_type_map:
+    cdef dict pandas_type_map = _get_pandas_type_map()
+    if type_id not in pandas_type_map:
         return None
     if coerce_to_ns:
         # ARROW-3789: Coerce date/timestamp types to datetime64[ns]
         if type_id == _Type_DURATION:
             return np.dtype('timedelta64[ns]')
         return np.dtype('datetime64[ns]')
-    pandas_type = _pandas_type_map[type_id]
+    pandas_type = pandas_type_map[type_id]
     if isinstance(pandas_type, dict):
         unit = getattr(arrow_type, 'unit', None)
         pandas_type = pandas_type.get(unit, None)
@@ -1765,6 +1774,25 @@ cdef class ExtensionType(BaseExtensionType):
         return ExtensionScalar
 
 
+cdef class UuidType(BaseExtensionType):
+    """
+    Concrete class for UUID extension type.
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        BaseExtensionType.init(self, type)
+        self.uuid_ext_type = <const CUuidType*> type.get()
+
+    def __arrow_ext_class__(self):
+        return UuidArray
+
+    def __reduce__(self):
+        return uuid, ()
+
+    def __arrow_ext_scalar_class__(self):
+        return UuidScalar
+
+
 cdef class FixedShapeTensorType(BaseExtensionType):
     """
     Concrete class for fixed shape tensor extension type.
@@ -5208,6 +5236,21 @@ def run_end_encoded(run_end_type, value_type):
     return pyarrow_wrap_data_type(ree_type)
 
 
+def uuid():
+    """
+    Create UuidType instance.
+
+    Returns
+    -------
+    type : UuidType
+    """
+
+    cdef UuidType out = UuidType.__new__(UuidType)
+    c_uuid_ext_type = GetResultValue(CUuidType.Make())
+    out.init(c_uuid_ext_type)
+    return out
+
+
 def fixed_shape_tensor(DataType value_type, shape, dim_names=None, permutation=None):
     """
     Create instance of fixed shape tensor extension type with shape and optional
diff --git a/python/pyproject.toml b/python/pyproject.toml
index d863bb3e5f0ac..8ece65dd467bb 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -48,6 +48,7 @@ classifiers  = [
     'Programming Language :: Python :: 3.10',
     'Programming Language :: Python :: 3.11',
     'Programming Language :: Python :: 3.12',
+    'Programming Language :: Python :: 3.13',
 ]
 maintainers = [
     {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"}
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index faa078d3d7fe7..2d448004768ce 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,3 +1,8 @@
+# Remove pre and extra index url once there's NumPy and Cython wheels for 3.13
+# on PyPI
+--pre
+--extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+
 cython>=0.29.31
 oldest-supported-numpy>=0.14; python_version<'3.9'
 numpy>=2.0.0; python_version>='3.9'
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index c7ff63e339575..98ec2bd4fd4e4 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -1,3 +1,9 @@
+# Remove pre and extra index url once there's NumPy and Cython wheels for 3.13
+# on PyPI
+--pre
+--prefer-binary
+--extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+
 cffi
 cython
 hypothesis
@@ -12,5 +18,6 @@ tzdata; sys_platform == 'win32'
 numpy~=1.21.3; python_version < "3.11"
 numpy~=1.23.2; python_version == "3.11"
 numpy~=1.26.0; python_version == "3.12"
+numpy~=2.1.0; python_version >= "3.13"
 
 pandas
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 1a4b4a4e05614..53d3dd52bd8a6 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -335,7 +335,7 @@ def _load_pyarrow_in_runner(driver, wheel_name):
         """
 import pyarrow,pathlib
 pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
-pytest.main([pyarrow_dir, '-v'])
+pytest.main([pyarrow_dir, '-r', 's'])
 """,
         wait_for_terminate=False,
     )
diff --git a/r/NEWS.md b/r/NEWS.md
index 0e6e4634a0af8..b9568afe66542 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -32,7 +32,7 @@
   functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223)
 * `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350)
 * `summarize()` supports more complex expressions, and correctly handles cases
-  where column names are reused in expressions.
+  where column names are reused in expressions. (#41223)
 * The `na_matches` argument to the `dplyr::*_join()` functions is now supported.
   This argument controls whether `NA` values are considered equal when joining. (#41358)
 * R metadata, stored in the Arrow schema to support round-tripping data between
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 44dfbbcd5c7e7..4c3b78e085c6e 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -41,7 +41,7 @@ supported_dplyr_methods <- list(
   collect = NULL,
   summarise = c(
     "window functions not currently supported;",
-    'arguments `.drop = FALSE` and `.groups = "rowwise" not supported'
+    'arguments `.drop = FALSE` and `.groups = "rowwise"` not supported'
   ),
   group_by = NULL,
   groups = NULL,
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 7f0627c33d010..4f90dd16b266f 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -67,7 +67,7 @@
 #' * [`slice_min()`][dplyr::slice_min()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_sample()`][dplyr::slice_sample()]: slicing within groups not supported; `replace = TRUE` and the `weight_by` argument not supported; `n` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_tail()`][dplyr::slice_tail()]: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
-#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise" not supported
+#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise"` not supported
 #' * [`tally()`][dplyr::tally()]
 #' * [`transmute()`][dplyr::transmute()]
 #' * [`ungroup()`][dplyr::ungroup()]
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index a9ad750de7c42..42fd245e5ab9d 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -241,7 +241,7 @@ group_types <- function(.data, schema = NULL) {
 }
 
 format_aggregation <- function(x) {
-  paste0(x$fun, "(", paste(map(x$data, ~ .$ToString()), collapse = ","), ")")
+  Expression$create(x$fun, args = x$data, options = x$options)$ToString()
 }
 
 # This function evaluates an expression and returns the post-summarize
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 9ef9cd7dda6fb..aceb533a151f6 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -54,7 +54,7 @@ Table into an R \code{tibble}.
 \item \code{\link[dplyr:slice]{slice_min()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_sample()}}: slicing within groups not supported; \code{replace = TRUE} and the \code{weight_by} argument not supported; \code{n} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_tail()}}: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and `.groups = "rowwise" not supported
+\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and \code{.groups = "rowwise"} not supported
 \item \code{\link[dplyr:count]{tally()}}
 \item \code{\link[dplyr:transmute]{transmute()}}
 \item \code{\link[dplyr:group_by]{ungroup()}}
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 95212407acf9d..8d2a209df547f 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -955,6 +955,44 @@ test_that("Summarize with 0 arguments", {
   )
 })
 
+test_that("Printing aggregation expressions", {
+  q <- tbl |>
+    arrow_table() |>
+    summarize(
+      total = sum(int, na.rm = TRUE),
+      prod = prod(int, na.rm = TRUE),
+      any = any(lgl, na.rm = TRUE),
+      all = all(lgl, na.rm = TRUE),
+      mean = mean(int, na.rm = TRUE),
+      sd = sd(int, na.rm = TRUE),
+      var = var(int, na.rm = TRUE),
+      n_distinct = n_distinct(chr),
+      min = min(int, na.rm = TRUE),
+      max = max(int, na.rm = TRUE)
+    )
+  expect_output(
+    print(q$.data),
+    "Table (query)
+int: int32
+lgl: bool
+chr: string
+
+* Aggregations:
+total: sum(int, {skip_nulls=true, min_count=0})
+prod: product(int, {skip_nulls=true, min_count=0})
+any: any(lgl, {skip_nulls=true, min_count=0})
+all: all(lgl, {skip_nulls=true, min_count=0})
+mean: mean(int, {skip_nulls=true, min_count=0})
+sd: stddev(int, {ddof=1, skip_nulls=true, min_count=0})
+var: variance(int, {ddof=1, skip_nulls=true, min_count=0})
+n_distinct: count_distinct(chr, {mode=ALL})
+min: min(int, {skip_nulls=true, min_count=0})
+max: max(int, {skip_nulls=true, min_count=0})
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+})
+
 test_that("Not supported: window functions", {
   compare_dplyr_binding(
     .input %>%
diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R
index d671c12138c60..54159e82ca60f 100644
--- a/r/tests/testthat/test-gcs.R
+++ b/r/tests/testthat/test-gcs.R
@@ -116,12 +116,12 @@ test_that("GcsFileSystem$create() can read json_credentials", {
 })
 
 skip_on_cran()
-skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.")
+skip_if_not(system("storage-testbench -h") == 0, message = "googleapis-storage-testbench is not installed.")
 library(dplyr)
 
 testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001")
 
-pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port),
+pid_minio <- sys::exec_background("storage-testbench", c("--port", testbench_port),
   std_out = FALSE,
   std_err = FALSE # TODO: is there a good place to send output?
 )
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index 248a80292a029..147f9cc028d78 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -52,6 +52,3 @@ There are a number of ways in which we do this:
 * [Running R with the C++ debugger attached](https://arrow.apache.org/docs/r/articles/developers/debugging.html)
 * [In-depth guide to how the package installation works](https://arrow.apache.org/docs/r/articles/developers/install_details.html)
 * [Using Docker to diagnose a bug or test a feature on a specific OS](https://arrow.apache.org/docs/r/articles/developers/docker.html)
-* [Writing bindings between R functions and Arrow Acero functions](https://arrow.apache.org/docs/r/articles/developers/bindings.html)
-
-
diff --git a/ruby/red-arrow/ext/arrow/extconf.rb b/ruby/red-arrow/ext/arrow/extconf.rb
index 28ccd0b2d59e1..a3005cd56f270 100644
--- a/ruby/red-arrow/ext/arrow/extconf.rb
+++ b/ruby/red-arrow/ext/arrow/extconf.rb
@@ -91,7 +91,7 @@
   symbols_in_external_bundles.each do |symbol|
     $DLDFLAGS << " -Wl,-U,#{symbol}"
   end
-  mmacosx_version_min = "-mmacosx-version-min=10.15"
+  mmacosx_version_min = "-mmacosx-version-min=12.0"
   $CFLAGS << " #{mmacosx_version_min}"
   $CXXFLAGS << " #{mmacosx_version_min}"
 end
diff --git a/ruby/red-arrow/lib/arrow/decimal128-array.rb b/ruby/red-arrow/lib/arrow/decimal128-array.rb
index a5ee53be7b229..528c878a859b5 100644
--- a/ruby/red-arrow/lib/arrow/decimal128-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128-array.rb
@@ -18,7 +18,9 @@
 module Arrow
   class Decimal128Array
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/decimal256-array.rb b/ruby/red-arrow/lib/arrow/decimal256-array.rb
index 8c2306dfe3627..32841ca4862f5 100644
--- a/ruby/red-arrow/lib/arrow/decimal256-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256-array.rb
@@ -19,7 +19,9 @@ module Arrow
   class Decimal256Array
     # @since 3.0.0
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
index a50e2cf4a4832..a6e7c4e1ac433 100644
--- a/ruby/red-arrow/test/test-decimal128-array.rb
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -38,4 +38,10 @@ class Decimal128ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal128Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/test/test-decimal256-array.rb
index ed542f2d6c75e..053e948fc84b7 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/test/test-decimal256-array.rb
@@ -38,4 +38,10 @@ class Decimal256ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal256Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end
diff --git a/testing b/testing
index 735ae7128d571..4d209492d514c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 735ae7128d571398dd798d7ff004adebeb342883
+Subproject commit 4d209492d514c2d3cb2d392681b9aa00e6d8da1c